RubyGems - doppelganger - Versions diffs - 0.8.0 - Mend

doppelganger 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

data/CHANGELOG +31 -0
data/LICENSE +22 -0
data/Manifest.txt +32 -0
data/README.rdoc +67 -0
data/Rakefile +32 -0
data/bin/doppelganger +189 -0
data/doppelganger.gemspec +53 -0
data/lib/doppelganger.rb +63 -0
data/lib/doppelganger/extractor.rb +105 -0
data/lib/doppelganger/exts/array.rb +23 -0
data/lib/doppelganger/exts/sexp.rb +89 -0
data/lib/doppelganger/node_analysis.rb +121 -0
data/lib/doppelganger/unified_ruby.rb +358 -0
data/tasks/bones.rake +20 -0
data/tasks/gem.rake +192 -0
data/tasks/git.rake +41 -0
data/tasks/manifest.rake +48 -0
data/tasks/post_load.rake +38 -0
data/tasks/rdoc.rake +48 -0
data/tasks/rubyforge.rake +55 -0
data/tasks/setup.rb +239 -0
data/tasks/test.rake +34 -0
data/test/array_test.rb +16 -0
data/test/doppelganger_test.rb +112 -0
data/test/sample_files/duplicate_test_data/first_file.rb +7 -0
data/test/sample_files/duplicate_test_data/second_file.rb +7 -0
data/test/sample_files/larger_diff/first_file.rb +7 -0
data/test/sample_files/larger_diff/second_file.rb +7 -0
data/test/sample_files/repeats_removal_sample_file.rb +94 -0
data/test/sample_files/sexp_test_file.rb +7 -0
data/test/sexp_ext_test.rb +53 -0
data/test/test_helper.rb +79 -0
metadata +158 -0

data/lib/doppelganger/extractor.rb ADDED

@@ -0,0 +1,105 @@
+require "#{Doppelganger::LIBPATH}doppelganger/unified_ruby"
+MethodDef = Struct.new(:name, :args, :body, :node, :filename, :line, :flat_body_array, :last_line)
+BlockNode = Struct.new(:body, :node, :filename, :line, :flat_body_array, :last_line)
+IterNode = Struct.new(:call_node, :asgn_node, :body, :node, :filename, :line, :flat_body_array, :last_line)
+module Doppelganger
+  # This class goes through all the ruby files in a directory and parses it into Sexp's.
+  # It then exracts the definitions and stores them all and then includes the NodeAnalysis module
+  # with allows a number of comparisons.
+  class Extractor < SexpProcessor
+    include UnifiedRuby
+    attr_reader :sexp_blocks, :dir
+    def initialize
+      super
+      self.auto_shift_type = true
+      @rp = RubyParser.new
+      @sexp_blocks = []
+    end
+    # This goes through all the files in the directory and parses them extracting
+    # all the block-like nodes.
+    def extract_blocks(dir)
+      @dir = File.expand_path(dir)
+      if File.directory? @dir
+        Find.find(*Dir["#{self.dir}/**/*.rb"]) do |filename|
+          if File.file? filename
+            sexp = @rp.process(File.read(filename), filename)
+            self.process(sexp)
+          end
+        end
+      elsif File.file? @dir
+        sexp = @rp.process(File.read(@dir), @dir)
+        self.process(sexp)
+      end
+      @sexp_blocks
+    end
+    def process_defn(exp)
+      method = MethodDef.new
+      method.name = exp.shift
+      method.args = process(exp.shift)
+      method.last_line = exp.last_line_number
+      method.body = process(exp.shift)
+      method.node = s(:defn, method.name, method.args, method.body.dup)
+      method.flat_body_array = method.body.dup.remove_literals.to_flat_ary
+      method.filename = exp.file
+      method.line = exp.line
+      unless method.body == s(:scope, s(:block, s(:nil)))
+        @sexp_blocks << method
+      end
+      method.node
+    end
+    def process_block(exp)
+      block_node = BlockNode.new
+      block_node.last_line = exp.last_line_number
+      if exp.size > 1
+        block_node.body = s()
+        until (exp.empty?) do
+          block_node.body << process(exp.shift)
+        end
+        block_node.node = s(:block, *block_node.body.dup)
+      else
+        block_node.body = exp.shift
+        block_node.node = s(:block, block_node.body.dup)
+      end
+      block_node.flat_body_array = block_node.body.dup.remove_literals.to_flat_ary
+      block_node.filename = exp.file
+      block_node.line = exp.line
+      unless block_node.body == s(:nil)
+        @sexp_blocks << block_node
+      end
+      block_node.node
+    end
+    def process_iter(exp)
+      unless exp[2][0] == :block
+        iter_node = IterNode.new
+        iter_node.call_node = process(exp.shift)
+        iter_node.asgn_node = process(exp.shift)
+        iter_node.last_line = exp.last_line_number
+        iter_node.body = process(exp.shift)
+        iter_node.node = s(:iter, iter_node.call_node, iter_node.asgn_node, iter_node.body.dup)
+        iter_node.flat_body_array = iter_node.body.dup.remove_literals.to_flat_ary
+        iter_node.filename = exp.file
+        iter_node.line = exp.line
+        @sexp_blocks << iter_node
+        iter_node.node
+      else
+        call_node = process(exp.shift)
+        asgn_node = process(exp.shift)
+        body = process(exp.shift)
+        s(:iter, call_node, asgn_node, body)
+      end
+    end
+  end
+end

data/lib/doppelganger/exts/array.rb ADDED

@@ -0,0 +1,23 @@
+class Array
+  # Return all duplicate elments (uses <tt>==</tt> for comparison).
+  def duplicates?(element)
+    (self.select {|elem| elem == element}).size > 1
+  end
+  def stepwise(compare_method) #:nodoc:
+    self.each do |element1|
+      self.each do |element2|
+        next if element1.send(compare_method) == element2.send(compare_method)
+        yield element1, element2
+      end
+    end
+  end
+  def comparing_collect #:nodoc:
+    accumulator = [] # collect implementation copied from Rubinius
+    stepwise do |element1, element2|
+      accumulator << element1 if yield(element1, element2)
+    end
+    accumulator.compact.uniq
+  end
+end

data/lib/doppelganger/exts/sexp.rb ADDED

@@ -0,0 +1,89 @@
+# This is pulled in part from Ryan Davis' Sexp additions in Flay.
+class Sexp
+  # Performs the block on every Sexp in this sexp.
+  def deep_each(&block)
+    self.each_sexp do |sexp|
+      block[sexp]
+      sexp.deep_each(&block)
+    end
+  end
+  # Finds the last line of the Sexp if that information is available.
+  def last_line_number
+    line_number = nil
+    self.deep_each do |sub_node|
+      if sub_node.respond_to? :line
+        line_number = sub_node.line
+      end
+    end
+    line_number
+  end
+  # Maps all sub Sexps into a new Sexp, if the node isn't a Sexp
+  # performs the block and maps the result into the new Sexp.
+  def map_sexps
+    self.inject(s()) do |sexps, sexp|
+      unless Sexp === sexp
+        sexps << sexp
+      else
+        sexps << yield(sexp)
+      end
+      sexps
+    end
+  end
+  # Rejects all objects in the Sexp that return true for the block.
+  def deep_reject(&block)
+    output_sexp = self.reject do |node|
+      block[node]
+    end
+    output_sexp.map_sexps do |sexp|
+      sexp.deep_reject(&block)
+    end
+  end
+  # Removes all literals from the Sexp (Symbols aren't excluded as they are used internally
+  # by Sexp for node names which identifies structure important for comparison.)
+  def remove_literals
+    self.deep_reject do |node|
+      !((node.is_a?(Symbol)) || (node.is_a?(Sexp)))
+    end
+  end
+  # Iterates through each child Sexp of the current Sexp.
+  def each_sexp
+    self.each do |sexp|
+      next unless Sexp === sexp
+      yield sexp
+    end
+  end
+  # Performs the block on every Sexp in this sexp, looking for one that returns true.
+  def deep_any?(&block)
+    self.any_sexp? do |sexp|
+      block[sexp] || sexp.deep_any?(&block)
+    end
+  end
+  # Iterates through each child Sexp of the current Sexp and looks for any Sexp
+  # that returns true for the block.
+  def any_sexp?
+    self.any? do |sexp|
+      next unless Sexp === sexp
+      yield sexp
+    end
+  end
+  # Determines if the passed in block node is contained with in the Sexp node.
+  def contains_block?(block_node)
+    self.deep_any? do |sexp|
+      sexp == block_node
+    end
+  end
+  # First turns the Sexp into an Array then flattens it.
+  def to_flat_ary
+    self.to_a.flatten
+  end
+end

data/lib/doppelganger/node_analysis.rb ADDED

@@ -0,0 +1,121 @@
+module Doppelganger
+  # This handles the comparison of the Ruby nodes.
+  #
+  # This will use various iterators to compare all the diffent block-like nodes
+  # in your code base and find similar or duplicate nodes.
+  class NodeAnalysis
+    attr_accessor :sexp_blocks
+    def initialize(sexp_blocks)
+      @sexp_blocks = sexp_blocks
+    end
+    # Are there any duplicates in the code base.
+    def duplication?
+      not duplicates.empty?
+    end
+    # Finds blocks of code that are exact duplicates, node for node. All duplicate
+    # blocks are grouped together.
+    def duplicates
+      block_nodes = @sexp_blocks.map{ |sblock| sblock.body.remove_literals }
+      (@sexp_blocks.inject([]) do |duplicate_blocks, sblock|
+        node_body = sblock.body.remove_literals
+        if block_nodes.duplicates?(node_body)
+          if duplicate_blocks.map{|sb| sb.first.body.remove_literals}.include?(node_body)
+            duplicate_blocks.find{|sb| sb.first.body.remove_literals == node_body } << sblock
+          else
+            duplicate_blocks << [sblock]
+          end
+        end
+        duplicate_blocks
+      end).compact.uniq
+    end
+    # Finds block-like nodes that differ from another node by the threshold or less, but are not duplicates.
+    def diff(threshold, progress_bar = nil)
+      diff_nodes = []
+      @compared_node_pairs = []
+      stepwise_sblocks(progress_bar) do |block_node_1, block_node_2|
+        if threshold >= Diff::LCS.diff(block_node_1.flat_body_array, block_node_2.flat_body_array).size
+          diff_nodes << [block_node_1, block_node_2]
+        end
+        @compared_node_pairs << [block_node_1, block_node_2]
+      end
+      @compared_node_pairs = []
+      cleanup_descendant_duplicate_matches(diff_nodes)
+    end
+    # Finds block-like nodes that differ by a given threshold percentage or less, but are not duplicates.
+    def percent_diff(percentage, progress_bar = nil)
+      # To calculate the percentage we can do this in one of two ways we can compare
+      # total differences (the diff set flattened) over the total nodes (the flattened bodies added)
+      # or we can compare the number of change sets (the size of the diff) over the average number of nodes
+      # in the two methods.
+      # Not sure which is best but I've gone with the former for now.
+      diff_nodes = []
+      @compared_node_pairs = []
+      stepwise_sblocks(progress_bar) do |block_node_1, block_node_2|
+        total_nodes = block_node_1.flat_body_array.size + block_node_2.flat_body_array.size
+        diff_size = Diff::LCS.diff(block_node_1.flat_body_array, block_node_2.flat_body_array).flatten.size
+        if percentage >= (diff_size.to_f/total_nodes.to_f * 100)
+          diff_nodes << [block_node_1, block_node_2]
+        end
+        @compared_node_pairs << [block_node_1, block_node_2]
+      end
+      @compared_node_pairs = []
+      cleanup_descendant_duplicate_matches(diff_nodes)
+    end
+    protected
+      def stepwise_sblocks(progress_bar = nil)
+        @sexp_blocks.dup.each do |node1|
+          @sexp_blocks.dup.each do |node2|
+            progress_bar.inc unless progress_bar.nil?
+            next if nodes_compared? node1, node2
+            next if node1.body.remove_literals == node2.body.remove_literals
+            next if one_node_is_child_of_the_other? node1, node2
+            yield node1, node2
+          end
+        end
+      end
+      def node_includes_block?(element, block_node)
+        (element.filename == block_node.filename) &&
+          ((element.line..(element.last_line+1)).include?(block_node.line) ||
+          element.node.contains_block?(block_node.node))
+      end
+      def cleanup_descendant_duplicate_matches(diff_nodes)
+        diff_nodes.reject do |block_node_pair|
+          ancestor_pair_in_results?(block_node_pair, diff_nodes)
+        end
+      end
+      def ancestor_pair_in_results?(pair, results)
+        matches = results.select do |block_node_pair|
+          block_node_pair.any?{|n| node_includes_block?(n, pair.first)} &&
+            block_node_pair.any?{|n| node_includes_block?(n, pair.last)}
+        end
+        matches.size > 1
+      end
+      def one_node_is_child_of_the_other?(node1, node2)
+        if node1.is_a?(MethodDef) && node2.is_a?(BlockNode)
+          (node_includes_block?(node1, node2))
+        elsif node1.is_a?(BlockNode) && node2.is_a?(MethodDef)
+          (node_includes_block?(node2, node1))
+        else
+          (node_includes_block?(node1, node2) || node_includes_block?(node2, node1))
+        end
+      end
+      def nodes_compared?(node1, node2)
+        @compared_node_pairs.any? do |block_node_pair|
+          block_pair_nodes = block_node_pair.map(&:node)
+          block_pair_nodes.include?(node1.node) && block_pair_nodes.include?(node2.node)
+        end
+      end
+  end
+end

data/lib/doppelganger/unified_ruby.rb ADDED

@@ -0,0 +1,358 @@
+# This is copied from parse tree but with the some of the "raise" statements
+# removed. Also the rewrite_masgn method has been removed because of
+# many places where masgn is used that doesn't meet the requirements
+# of the first if/raise line.
+$TESTING ||= false
+module UnifiedRuby
+  def process exp
+    exp = Sexp.from_array exp unless Sexp === exp or exp.nil?
+    super
+  end
+  def rewrite_argscat exp
+    _, ary, val = exp
+    ary = s(:array, ary) unless ary.first == :array
+    ary << s(:splat, val)
+  end
+  def rewrite_argspush exp
+    exp[0] = :arglist
+    exp
+  end
+  def rewrite_attrasgn(exp)
+    last = exp.last
+    if Sexp === last then
+      last[0] = :arglist if last[0] == :array
+    else
+      exp << s(:arglist)
+    end
+    exp
+  end
+  def rewrite_begin(exp)
+    if exp.size > 2
+      exp
+    else
+      exp.last
+    end
+  end
+  def rewrite_block_pass exp
+    if exp.size == 3 then
+      _, block, recv = exp
+      case recv.first
+      when :super then
+        recv << s(:block_pass, block)
+        exp = recv
+      when :call then
+        recv.last << s(:block_pass, block)
+        exp = recv
+      else
+        exp
+      end
+    end
+    exp
+  end
+  def rewrite_bmethod(exp)
+    _, args, body = exp
+    args ||= s(:array)
+    body ||= s(:block)
+    args = s(:args, args) unless args[0] == :array
+    args = args[1] if args[1] && args[1][0] == :masgn # TODO: clean up
+    args = args[1] if args[1] && args[1][0] == :array
+    args[0] = :args
+    # this is ugly because rewriters are depth first.
+    # TODO: maybe we could come up with some way to do both forms of rewriting.
+    args.map! { |s|
+      if Sexp === s
+        case s[0]
+        when :lasgn then
+          s[1]
+        when :splat then
+          :"*#{s[1][1]}"
+        else
+          raise "huh?: #{s.inspect}"
+        end
+      else
+        s
+      end
+    }
+    body = s(:block, body) unless body[0] == :block
+    body.insert 1, args
+    s(:scope, body)
+  end
+  def rewrite_call(exp)
+    args = exp.last
+    case args
+    when nil
+      exp.pop
+    when Array
+      case args.first
+      when :array, :arglist then
+        args[0] = :arglist
+      when :argscat, :splat then
+        exp[-1] = s(:arglist, args)
+      else
+        raise "unknown type in call #{args.first.inspect} in #{exp.inspect}"
+      end
+      return exp
+    end
+    exp << s(:arglist)
+    exp
+  end
+  def rewrite_dasgn(exp)
+    exp[0] = :lasgn
+    exp
+  end
+  alias :rewrite_dasgn_curr :rewrite_dasgn
+  ##
+  # :defn is one of the most complex of all the ASTs in ruby. We do
+  # one of 3 different translations:
+  #
+  # 1) From:
+  #
+  #   s(:defn, :name, s(:scope, s(:block, s(:args, ...), ...)))
+  #   s(:defn, :name, s(:bmethod, s(:masgn, s(:dasgn_curr, :args)), s(:block, ...)))
+  #   s(:defn, :name, s(:fbody, s(:bmethod, s(:masgn, s(:dasgn_curr, :splat)), s(:block, ...))))
+  #
+  # to:
+  #
+  #   s(:defn, :name, s(:args, ...), s(:scope, s:(block, ...)))
+  #
+  # 2) From:
+  #
+  #   s(:defn, :writer=, s(:attrset, :@name))
+  #
+  # to:
+  #
+  #   s(:defn, :writer=, s(:args), s(:attrset, :@name))
+  #
+  # 3) From:
+  #
+  #   s(:defn, :reader, s(:ivar, :@name))
+  #
+  # to:
+  #
+  #   s(:defn, :reader, s(:args), s(:ivar, :@name))
+  #
+  def rewrite_defn(exp)
+    weirdo = exp.ivar || exp.attrset
+    fbody  = exp.fbody(true)
+    weirdo ||= fbody.cfunc if fbody
+    exp.push(fbody.scope) if fbody unless weirdo
+    args = exp.scope.block.args(true) unless weirdo
+    exp.insert 2, args if args
+    # move block_arg up and in
+    block_arg = exp.scope.block.block_arg(true) rescue nil
+    if block_arg
+      block = args.block(true)
+      args << :"&#{block_arg.last}"
+      args << block if block
+    end
+    # patch up attr_accessor methods
+    if weirdo then
+      case
+      when fbody && fbody.cfunc then
+        exp.insert 2, s(:args, :"*args")
+      when exp.ivar then
+        exp.insert 2, s(:args)
+      when exp.attrset then
+        exp.insert 2, s(:args, :arg)
+      else
+        raise "unknown wierdo: #{wierdo.inpsect}"
+      end
+    end
+    exp
+  end
+  def rewrite_defs(exp)
+    receiver = exp.delete_at 1
+    # TODO: I think this would be better as rewrite_scope, but that breaks others
+    exp = s(exp.shift, exp.shift,
+            s(:scope,
+              s(:block, exp.scope.args))) if exp.scope && exp.scope.args
+    result = rewrite_defn(exp)
+    result.insert 1, receiver
+    result
+  end
+  def rewrite_dmethod(exp)
+    exp.shift # type
+    exp.shift # dmethod name
+    exp.shift # scope / block / body
+  end
+  def rewrite_dvar(exp)
+    exp[0] = :lvar
+    exp
+  end
+  def rewrite_fcall(exp)
+    exp[0] = :call
+    exp.insert 1, nil
+    rewrite_call(exp)
+  end
+  def rewrite_op_asgn1(exp)
+    exp[2][0] = :arglist # if exp[2][0] == :array
+    exp
+  end
+  def rewrite_resbody(exp)
+    exp[1] ||= s(:array)        # no args
+    body = exp[2]
+    if body then
+      case body.first
+      when :lasgn, :iasgn then
+        exp[1] << exp.delete_at(2) if body[-1] == s(:gvar, :$!)
+      when :block then
+        exp[1] << body.delete_at(1) if [:lasgn, :iasgn].include?(body[1][0]) &&
+          body[1][-1] == s(:gvar, :$!)
+      end
+    end
+    exp << nil if exp.size == 2 # no body
+    exp
+  end
+  def rewrite_rescue(exp)
+    # SKETCHY HACK return exp if exp.size > 4
+    ignored = exp.shift
+    body    = exp.shift unless exp.first.first == :resbody
+    resbody = exp.shift
+    els     = exp.shift unless exp.first.first == :resbody unless exp.empty?
+    rest    = exp.empty? ? nil : exp # graceful re-rewriting (see rewrite_begin)
+    resbodies = []
+    unless rest then
+      while resbody do
+        resbodies << resbody
+        resbody = resbody.resbody(true)
+      end
+      resbodies.each do |resbody|
+        if resbody[2] && resbody[2][0] == :block && resbody[2].size == 2 then
+          resbody[2] = resbody[2][-1]
+        end
+      end
+    else
+      resbodies = [resbody] + rest
+    end
+    resbodies << els if els
+    s(:rescue, body, *resbodies).compact
+  end
+  def rewrite_splat(exp)
+    good = [:arglist, :argspush, :array, :svalue, :yield, :super].include? context.first
+    exp = s(:array, exp) unless good
+    exp
+  end
+  def rewrite_super(exp)
+    return exp if exp.structure.flatten.first(3) == [:super, :array, :splat]
+    exp.push(*exp.pop[1..-1]) if exp.size == 2 && exp.last.first == :array
+    exp
+  end
+  def rewrite_vcall(exp)
+    exp.push nil
+    rewrite_fcall(exp)
+  end
+  def rewrite_yield(exp)
+    real_array = exp.pop if exp.size == 3
+    if exp.size == 2 then
+      if real_array then
+        exp[-1] = s(:array, exp[-1]) if exp[-1][0] != :array
+      else
+        exp.push(*exp.pop[1..-1]) if exp.last.first == :array
+      end
+    end
+    exp
+  end
+  def rewrite_zarray(exp)
+    exp[0] = :array
+    exp
+  end
+end
+class PreUnifier < SexpProcessor
+  def initialize
+    super
+    @unsupported.delete :newline
+  end
+  def rewrite_call exp
+    exp << s(:arglist) if exp.size < 4
+    exp.last[0] = :arglist if exp.last.first == :array
+    exp
+  end
+  def rewrite_fcall exp
+    exp << s(:arglist) if exp.size < 3
+    if exp[-1][0] == :array then
+      has_splat = exp[-1].find { |s| Array === s && s.first == :splat }
+      exp[-1] = s(:arglist, exp[-1]) if has_splat
+      exp[-1][0] = :arglist
+    end
+    exp
+  end
+end
+class PostUnifier < SexpProcessor
+  include UnifiedRuby
+  def initialize
+    super
+    @unsupported.delete :newline
+  end
+end
+##
+# Quick and easy SexpProcessor that unified the sexp structure.
+class Unifier < CompositeSexpProcessor
+  def initialize
+    super
+    self << PreUnifier.new
+    self << PostUnifier.new
+  end
+end