RubyGems - flay - Versions diffs - 1.0.0 → 1.1.0 - Mend

flay 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/History.txt CHANGED

@@ -1,3 +1,16 @@
+=== 1.1.0 / 2009-01-20
+* 8 minor enhancement:
+  * Added -v verbose mode to print out N-way diff of the detected code.
+  * Added identical node scoring and reporting.
+  * Added the start of copy/paste+edit detection, not even close yet
+  * Added more tests.
+  * Added rcov tasks
+  * Added the start of copy/paste+edit detection
+  * Clarified output a bit
+  * Refactored process_sexps to make doing other languages/systems easier.
 === 1.0.0 / 2008-11-06
 * 1 major enhancement

data/README.txt CHANGED

@@ -15,29 +15,53 @@ style, braces vs do/end, etc are all ignored. Making this totally rad.
 * Differences in whitespace, programming style, braces vs do/end, etc are ignored.
 * Works across files.
 * Reports differences at any level of code.
+* Adds a score multiplier to identical nodes.
+* Run verbose to see an N-way diff of the code.
 == TODO:
 * Editor integration (emacs, textmate, other contributions welcome).
-* N-way diff reporting... or... something. Not sure.
 * UI improvement suggestions welcome. :)
+* Score sequence fragments (a;b;c;d;e) vs (b;c;d) etc.
 == SYNOPSIS:
-  % flay lib/*.rb
-  Processing unit/itemconfig.rb...
+  % flay -v ~/Work/svn/ruby/ruby_1_8/lib/cgi.rb
+  Processing /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb...
-  Matches found in :when (mass = 572)
-    unit/itemconfig.rb:343
-    unit/itemconfig.rb:379
-    unit/itemconfig.rb:706
-    unit/itemconfig.rb:742
+  Matches found in :defn (mass = 184)
+    A: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:1470
+    B: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:1925
-  Matches found in :when (mass = 500)
-    unit/itemconfig.rb:509
-    unit/itemconfig.rb:539
-    unit/itemconfig.rb:875
-    unit/itemconfig.rb:905
+  A: def checkbox_group(name = "", *values)
+  B: def radio_group(name = "", *values)
+       if name.kind_of?(Hash) then
+         values = name["VALUES"]
+         name = name["NAME"]
+       end
+       values.collect do |value|
+         if value.kind_of?(String) then
+  A:       (checkbox(name, value) + value)
+  B:       (radio_button(name, value) + value)
+         else
+           if (value[(value.size - 1)] == true) then
+  A:         (checkbox(name, value[0], true) + value[(value.size - 2)])
+  B:         (radio_button(name, value[0], true) + value[(value.size - 2)])
+           else
+  A:         (checkbox(name, value[0]) + value[(value.size - 1)])
+  B:         (radio_button(name, value[0]) + value[(value.size - 1)])
+           end
+         end
+       end.to_s
+     end
+  IDENTICAL Matches found in :for (mass*2 = 144)
+    A: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:2160
+    B: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:2217
+     for element in ["HTML", "BODY", "P", "DT", "DD", "LI", "OPTION", "THEAD", "TFOOT", "TBODY", "COLGROUP", "TR", "TH", "TD", "HEAD"] do
+       methods = (methods + (("          def #{element.downcase}(attributes = {})\n" + nO_element_def(element)) + "          end\n"))
+     end
   ...
 == REQUIREMENTS:

data/Rakefile CHANGED

@@ -16,4 +16,31 @@ Hoe.new('flay', Flay::VERSION) do |flay|
   flay.extra_deps << ['ruby_parser',    '>= 1.1.0']
 end
+begin
+  require 'rcov/rcovtask'
+  Rcov::RcovTask.new do |t|
+    pattern = ENV['PATTERN'] || 'test/test_*.rb'
+    t.test_files = FileList[pattern]
+    t.verbose = true
+    t.rcov_opts << "--threshold 80"
+    t.rcov_opts << "--no-color"
+  end
+  task :rcov_info do
+    pattern = ENV['PATTERN'] || "test/test_*.rb"
+    ruby "-Ilib -S rcov --text-report --save coverage.info -x rcov,sexp_processor --test-unit-only #{pattern}"
+  end
+  task :rcov_overlay do
+    rcov, eol = Marshal.load(File.read("coverage.info")).last[ENV["FILE"]], 1
+    puts rcov[:lines].zip(rcov[:coverage]).map { |line, coverage|
+      bol, eol = eol, eol + line.length
+      [bol, eol, "#ffcccc"] unless coverage
+    }.compact.inspect
+  end
+rescue LoadError
+  # skip
+end
 # vim: syntax=Ruby

data/bin/flay CHANGED

@@ -1,7 +1,5 @@
 #!/usr/bin/ruby -s
-$m ||= 16
 require 'flay'
 flay = Flay.new($m.to_i)

data/lib/flay.rb CHANGED

@@ -1,6 +1,5 @@
 #!/usr/bin/env ruby -w
-$: << "../../sexp_processor/dev/lib" # TODO: remove
 $: << "../../ruby_parser/dev/lib"
 require 'rubygems'
@@ -8,9 +7,20 @@ require 'sexp_processor'
 require 'ruby_parser'
 require 'pp' # TODO: remove
+$m ||= 16
+$v ||= false
+$f ||= false
+if $v then
+  $: << "../../ruby2ruby/dev/lib"
+  require 'ruby2ruby'
+  require 'tempfile'
+end
 class Flay
-  VERSION = '1.0.0'
+  VERSION = '1.1.0'
+  attr_accessor :mass_threshold
   attr_reader :hashes
   def initialize(mass = 16)
@@ -20,21 +30,60 @@ class Flay
   def process(*files)
     files.each do |file|
-      warn "Processing #{file}..."
+      warn "Processing #{file}"
-      t = Time.now
       pt = RubyParser.new.process(File.read(file), file)
       next unless pt # empty files... hahaha, suck.
-      t = Time.now
-      pt.deep_each do |node|
-        next unless node.any? { |sub| Sexp === sub }
-        next if node.mass < @mass_threshold
+      process_sexp pt
+    end
+    process_fuzzy_similarities if $f
+  end
+  def process_sexp pt
+    pt.deep_each do |node|
+      next unless node.any? { |sub| Sexp === sub }
+      next if node.mass < self.mass_threshold
-        self.hashes[node.fuzzy_hash] << node
+      self.hashes[node.fuzzy_hash] << node
+    end
+  end
+  def process_fuzzy_similarities
+    all_hashes, detected = {}, {}
+    self.hashes.values.each do |nodes|
+      nodes.each do |node|
+        next if node.mass > 4 * self.mass_threshold
+        # TODO: try out with fuzzy_hash
+        # all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] * s.mass }.flatten
+        all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] }.flatten
       end
     end
+    # warn "looking for copy/paste/edit code across #{all_hashes.size} nodes"
+    all_hashes = all_hashes.to_a
+    all_hashes.each_with_index do |(s1, h1), i|
+      similar = [s1]
+      all_hashes[i+1..-1].each do |(s2, h2)|
+        next if detected[h2]
+        intersection = h1.intersection h2
+        max = [h1.size, h2.size].max
+        if intersection.size >= max * 0.60 then
+          similarity = s1.similarity(s2)
+          if similarity > 0.60 then
+            similar << s2
+            detected[h2] = true
+          else
+            p [similarity, s1, s2]
+          end
+        end
+      end
+      self.hashes[similar.first.hash].push(*similar) if similar.size > 1
+    end
   end
   def prune
@@ -55,30 +104,88 @@ class Flay
     self.hashes.delete_if { |h,_| all_hashes[h] }
   end
+  def n_way_diff *data
+    data.each_with_index do |s, i|
+      c = (?A + i).chr
+      s.group = c
+    end
+    max = data.map { |s| s.scan(/^.*/).size }.max
+    data.map! { |s| # FIX: this is tarded, but I'm out of brain
+      c = s.group
+      s = s.scan(/^.*/)
+      s.push(*([""] * (max - s.size))) # pad
+      s.each do |o|
+        o.group = c
+      end
+      s
+    }
+    groups = data[0].zip(*data[1..-1])
+    groups.map! { |lines|
+      collapsed = lines.uniq
+      if collapsed.size == 1 then
+        "   #{lines.first}"
+      else
+        # TODO: make r2r have a canonical mode (doesn't make 1-liners)
+        lines.reject { |l| l.empty? }.map { |l| "#{l.group}: #{l}" }
+      end
+    }
+    groups.flatten.join("\n")
+  end
   def report prune = nil
     self.prune
-    self.hashes.sort_by { |_,nodes|
-      -(nodes.first.mass * nodes.size)
-    }.each do |_,nodes|
+    identical = {}
+    masses = {}
+    self.hashes.each do |hash,nodes|
+      identical[hash] = nodes[1..-1].all? { |n| n == nodes.first }
+      masses[hash] = nodes.first.mass * nodes.size
+      masses[hash] *= (nodes.size) if identical[hash]
+    end
+    count = 0
+    masses.sort_by { |h,m| [-m, hashes[h].first.file] }.each do |hash,mass|
+      nodes = hashes[hash]
       next unless nodes.first.first == prune if prune
       puts
+      same = identical[hash]
       node = nodes.first
-      puts "Matches found in %p (mass = %d)" %
-        [node.first, nodes.size * node.mass]
+      n = nodes.size
+      match, bonus = if same then
+                       ["IDENTICAL", "*#{n}"]
+                     else
+                       ["Similar",   ""]
+                     end
+      count += 1
+      puts "%d) %s code found in %p (mass%s = %d)" %
+        [count, match, node.first, bonus, mass]
+      nodes.each_with_index do |node, i|
+        if $v then
+          c = (?A + i).chr
+          puts "  #{c}: #{node.file}:#{node.line}"
+        else
+          puts "  #{node.file}:#{node.line}"
+        end
+      end
-      nodes.each do |node|
-        puts "  #{node.file}:#{node.line}"
+      if $v then
+        puts
+        r2r = Ruby2Ruby.new
+        puts n_way_diff(*nodes.map { |s| r2r.process(s.deep_clone) })
       end
     end
   end
 end
-class Symbol
-  def hash
-    @hash ||= self.to_s.hash
-  end
+class String
+  attr_accessor :group
 end
 class Sexp
@@ -108,6 +215,7 @@ class Sexp
     # TODO: I think this is wrong, since it isn't positional. What to do?
     l_sexp.zip(r_sexp).each do |l_sub, r_sub|
+      next unless l_sub && r_sub # HACK
       l2, s2, r2 = l_sub.compare_to r_sub
       l += l2
       s += s2
@@ -143,16 +251,33 @@ class Sexp
       yield sexp
     end
   end
+end
-  alias :old_inspect :inspect
-  def inspect
-    old_inspect.sub(/\)\Z/, ":h_#{self.fuzzy_hash})")
+class Array
+  def intersection other
+    intersection, start = [], 0
+    other_size = other.length
+    self.each_with_index do |m, i|
+      (start...other_size).each do |j|
+        n = other.at j
+        if m == n then
+          intersection << m
+          start = j + 1
+          break
+        end
+      end
+    end
+    intersection
   end
-  alias :shut_up! :pretty_print
-  def pretty_print(q) # shows the hash TODO: remove
-    q.group(1, 'S(', ')') do
-      q.seplist(self + [":h_#{self.fuzzy_hash}"]) {|v| q.pp v }
+  def triangle # TODO: use?
+    max = self.size
+    (0...max).each do |i|
+      o1 = at(i)
+      (i+1...max).each do |j|
+        o2 = at(j)
+        yield o1, o2
+      end
     end
   end
 end

data/test/test_flay.rb CHANGED

@@ -3,7 +3,19 @@
 require 'test/unit'
 require 'flay'
-class SexpTest < Test::Unit::TestCase
+require 'pp' # TODO: remove
+class Symbol # for testing only, makes the tests concrete
+  def hash
+    to_s.hash
+  end
+  def <=> o
+    Symbol === o && self.to_s <=> o.to_s
+  end
+end
+class TestSexp < Test::Unit::TestCase
   def setup
     # a(1) { |c| d }
     @s = s(:iter,
@@ -57,10 +69,9 @@ class SexpTest < Test::Unit::TestCase
   def test_all_subhashes
     expected = [-704571402, -282578980, -35395725,
-                160138040, 815971090, 927228382]
+                160138040, 815971090, 927228382] # , 955256285]
     assert_equal expected, @s.all_subhashes.sort.uniq
-    assert ! @s.all_subhashes.include?(@s.fuzzy_hash)
     x = []
@@ -71,4 +82,135 @@ class SexpTest < Test::Unit::TestCase
     assert_equal expected, x.sort.uniq
   end
+  def test_process_sexp
+    flay = Flay.new
+    s = RubyParser.new.process <<-RUBY
+      def x(n)
+        if n % 2 == 0
+          return n
+        else
+          return n + 1
+        end
+      end
+    RUBY
+    expected = [[:block],
+                # HACK [:defn],
+                [:scope]] # only ones big enough
+    flay.process_sexp s
+    actual = flay.hashes.values.map { |sexps| sexps.map { |sexp| sexp.first } }
+    assert_equal expected, actual.sort_by { |a| a.first.to_s }
+  end
+  def test_process_sexp_full
+    flay = Flay.new(1)
+    s = RubyParser.new.process <<-RUBY
+      def x(n)
+        if n % 2 == 0
+          return n
+        else
+          return n + 1
+        end
+      end
+    RUBY
+    expected = [[:arglist, :arglist, :arglist],
+                [:block],
+                [:call, :call],
+                [:call],
+                # HACK [:defn],
+                [:if],
+                [:return],
+                [:return],
+                [:scope]]
+    flay.process_sexp s
+    actual = flay.hashes.values.map { |sexps| sexps.map { |sexp| sexp.first } }
+    assert_equal expected, actual.sort_by { |a| a.first.to_s }
+  end
+  def test_process_sexp_no_structure
+    flay = Flay.new(1)
+    flay.process_sexp s(:lit, 1)
+    assert flay.hashes.empty?
+  end
+  def test_process_fuzzy_similarities
+    flay = Flay.new 7
+    s1 = RubyParser.new.process("def w(n); a; b; c; d; e; end")
+    s2 = RubyParser.new.process("def x(n); a;    c;    e; end")
+    flay.process_sexp s1
+    flay.process_sexp s2
+    flay.process_fuzzy_similarities
+    b1 = s1.scope.block
+    b2 = s2.scope.block
+    assert_equal [b2, b1], flay.hashes[b2.hash]
+  end
+  def test_process_fuzzy_similarities_2
+    flay = Flay.new 7
+    s1 = RubyParser.new.process("def w(n); a; b; c; d; e; end")
+    s2 = RubyParser.new.process("def x(n); a;    c;    e; end")
+    s3 = RubyParser.new.process("def y(n); a; f; c; g; e; end")
+    flay.process_sexp s1
+    flay.process_sexp s2
+    flay.process_sexp s3
+    flay.process_fuzzy_similarities
+    b1 = s1.scope.block
+    b2 = s2.scope.block
+    b3 = s3.scope.block
+    assert_equal [b3, b2, b1], flay.hashes[b3.hash]
+  end
+  def test_process_fuzzy_similarities_3
+    flay = Flay.new 7
+    s1 = RubyParser.new.process("def w (n); a; b;      c; d;      e; end")
+    s2 = RubyParser.new.process("def x (n); a;         c;         e; end")
+    s3 = RubyParser.new.process("def y (n); a; f;      c; g;      e; end")
+    s4 = RubyParser.new.process("def z (n); f; g;      h; i;      j; end")
+    s5 = RubyParser.new.process("def w1(n); a; b if x; c; d if y; e; end")
+    flay.process_sexp s1
+    flay.process_sexp s2
+    flay.process_sexp s3
+    flay.process_sexp s4
+    flay.process_sexp s5
+    flay.process_fuzzy_similarities
+    b1 = s1.scope.block
+    b2 = s2.scope.block
+    b3 = s3.scope.block
+    b5 = s5.scope.block
+    assert_equal [b3, b5, b2, b1], flay.hashes[b3.hash]
+  end
+end
+class ArrayIntersectionTests < Test::Unit::TestCase
+  def test_real_array_intersection
+    assert_equal [2], [2, 2, 2, 3, 7, 13, 49] & [2, 2, 2, 5, 11, 107]
+    assert_equal [2, 2, 2], [2, 2, 2, 3, 7, 13, 49].intersection([2, 2, 2, 5, 11, 107])
+    assert_equal ['a', 'c'], ['a', 'b', 'a', 'c'] & ['a', 'c', 'a', 'd']
+    assert_equal ['a', 'a'], ['a', 'b', 'a', 'c'].intersection(['a', 'c', 'a', 'd'])
+  end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: flay
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 1.1.0
 platform: ruby
 authors:
 - Ryan Davis
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2008-11-06 00:00:00 -05:00
+date: 2009-01-20 00:00:00 -08:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency