flay 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (7) hide show
  1. data/History.txt +13 -0
  2. data/README.txt +37 -13
  3. data/Rakefile +27 -0
  4. data/bin/flay +0 -2
  5. data/lib/flay.rb +153 -28
  6. data/test/test_flay.rb +145 -3
  7. metadata +2 -2
@@ -1,3 +1,16 @@
1
+ === 1.1.0 / 2009-01-20
2
+
3
+ * 8 minor enhancement:
4
+
5
+ * Added -v verbose mode to print out N-way diff of the detected code.
6
+ * Added identical node scoring and reporting.
7
+ * Added the start of copy/paste+edit detection, not even close yet
8
+ * Added more tests.
9
+ * Added rcov tasks
10
+ * Added the start of copy/paste+edit detection
11
+ * Clarified output a bit
12
+ * Refactored process_sexps to make doing other languages/systems easier.
13
+
1
14
  === 1.0.0 / 2008-11-06
2
15
 
3
16
  * 1 major enhancement
data/README.txt CHANGED
@@ -15,29 +15,53 @@ style, braces vs do/end, etc are all ignored. Making this totally rad.
15
15
  * Differences in whitespace, programming style, braces vs do/end, etc are ignored.
16
16
  * Works across files.
17
17
  * Reports differences at any level of code.
18
+ * Adds a score multiplier to identical nodes.
19
+ * Run verbose to see an N-way diff of the code.
18
20
 
19
21
  == TODO:
20
22
 
21
23
  * Editor integration (emacs, textmate, other contributions welcome).
22
- * N-way diff reporting... or... something. Not sure.
23
24
  * UI improvement suggestions welcome. :)
25
+ * Score sequence fragments (a;b;c;d;e) vs (b;c;d) etc.
24
26
 
25
27
  == SYNOPSIS:
26
28
 
27
- % flay lib/*.rb
28
- Processing unit/itemconfig.rb...
29
+ % flay -v ~/Work/svn/ruby/ruby_1_8/lib/cgi.rb
30
+ Processing /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb...
29
31
 
30
- Matches found in :when (mass = 572)
31
- unit/itemconfig.rb:343
32
- unit/itemconfig.rb:379
33
- unit/itemconfig.rb:706
34
- unit/itemconfig.rb:742
32
+ Matches found in :defn (mass = 184)
33
+ A: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:1470
34
+ B: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:1925
35
35
 
36
- Matches found in :when (mass = 500)
37
- unit/itemconfig.rb:509
38
- unit/itemconfig.rb:539
39
- unit/itemconfig.rb:875
40
- unit/itemconfig.rb:905
36
+ A: def checkbox_group(name = "", *values)
37
+ B: def radio_group(name = "", *values)
38
+ if name.kind_of?(Hash) then
39
+ values = name["VALUES"]
40
+ name = name["NAME"]
41
+ end
42
+ values.collect do |value|
43
+ if value.kind_of?(String) then
44
+ A: (checkbox(name, value) + value)
45
+ B: (radio_button(name, value) + value)
46
+ else
47
+ if (value[(value.size - 1)] == true) then
48
+ A: (checkbox(name, value[0], true) + value[(value.size - 2)])
49
+ B: (radio_button(name, value[0], true) + value[(value.size - 2)])
50
+ else
51
+ A: (checkbox(name, value[0]) + value[(value.size - 1)])
52
+ B: (radio_button(name, value[0]) + value[(value.size - 1)])
53
+ end
54
+ end
55
+ end.to_s
56
+ end
57
+
58
+ IDENTICAL Matches found in :for (mass*2 = 144)
59
+ A: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:2160
60
+ B: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:2217
61
+
62
+ for element in ["HTML", "BODY", "P", "DT", "DD", "LI", "OPTION", "THEAD", "TFOOT", "TBODY", "COLGROUP", "TR", "TH", "TD", "HEAD"] do
63
+ methods = (methods + ((" def #{element.downcase}(attributes = {})\n" + nO_element_def(element)) + " end\n"))
64
+ end
41
65
  ...
42
66
 
43
67
  == REQUIREMENTS:
data/Rakefile CHANGED
@@ -16,4 +16,31 @@ Hoe.new('flay', Flay::VERSION) do |flay|
16
16
  flay.extra_deps << ['ruby_parser', '>= 1.1.0']
17
17
  end
18
18
 
19
+ begin
20
+ require 'rcov/rcovtask'
21
+ Rcov::RcovTask.new do |t|
22
+ pattern = ENV['PATTERN'] || 'test/test_*.rb'
23
+
24
+ t.test_files = FileList[pattern]
25
+ t.verbose = true
26
+ t.rcov_opts << "--threshold 80"
27
+ t.rcov_opts << "--no-color"
28
+ end
29
+
30
+ task :rcov_info do
31
+ pattern = ENV['PATTERN'] || "test/test_*.rb"
32
+ ruby "-Ilib -S rcov --text-report --save coverage.info -x rcov,sexp_processor --test-unit-only #{pattern}"
33
+ end
34
+
35
+ task :rcov_overlay do
36
+ rcov, eol = Marshal.load(File.read("coverage.info")).last[ENV["FILE"]], 1
37
+ puts rcov[:lines].zip(rcov[:coverage]).map { |line, coverage|
38
+ bol, eol = eol, eol + line.length
39
+ [bol, eol, "#ffcccc"] unless coverage
40
+ }.compact.inspect
41
+ end
42
+ rescue LoadError
43
+ # skip
44
+ end
45
+
19
46
  # vim: syntax=Ruby
data/bin/flay CHANGED
@@ -1,7 +1,5 @@
1
1
  #!/usr/bin/ruby -s
2
2
 
3
- $m ||= 16
4
-
5
3
  require 'flay'
6
4
 
7
5
  flay = Flay.new($m.to_i)
@@ -1,6 +1,5 @@
1
1
  #!/usr/bin/env ruby -w
2
2
 
3
- $: << "../../sexp_processor/dev/lib" # TODO: remove
4
3
  $: << "../../ruby_parser/dev/lib"
5
4
 
6
5
  require 'rubygems'
@@ -8,9 +7,20 @@ require 'sexp_processor'
8
7
  require 'ruby_parser'
9
8
  require 'pp' # TODO: remove
10
9
 
10
+ $m ||= 16
11
+ $v ||= false
12
+ $f ||= false
13
+
14
+ if $v then
15
+ $: << "../../ruby2ruby/dev/lib"
16
+ require 'ruby2ruby'
17
+ require 'tempfile'
18
+ end
19
+
11
20
  class Flay
12
- VERSION = '1.0.0'
21
+ VERSION = '1.1.0'
13
22
 
23
+ attr_accessor :mass_threshold
14
24
  attr_reader :hashes
15
25
 
16
26
  def initialize(mass = 16)
@@ -20,21 +30,60 @@ class Flay
20
30
 
21
31
  def process(*files)
22
32
  files.each do |file|
23
- warn "Processing #{file}..."
33
+ warn "Processing #{file}"
24
34
 
25
- t = Time.now
26
35
  pt = RubyParser.new.process(File.read(file), file)
27
-
28
36
  next unless pt # empty files... hahaha, suck.
29
37
 
30
- t = Time.now
31
- pt.deep_each do |node|
32
- next unless node.any? { |sub| Sexp === sub }
33
- next if node.mass < @mass_threshold
38
+ process_sexp pt
39
+ end
40
+
41
+ process_fuzzy_similarities if $f
42
+ end
43
+
44
+ def process_sexp pt
45
+ pt.deep_each do |node|
46
+ next unless node.any? { |sub| Sexp === sub }
47
+ next if node.mass < self.mass_threshold
34
48
 
35
- self.hashes[node.fuzzy_hash] << node
49
+ self.hashes[node.fuzzy_hash] << node
50
+ end
51
+ end
52
+
53
+ def process_fuzzy_similarities
54
+ all_hashes, detected = {}, {}
55
+
56
+ self.hashes.values.each do |nodes|
57
+ nodes.each do |node|
58
+ next if node.mass > 4 * self.mass_threshold
59
+ # TODO: try out with fuzzy_hash
60
+ # all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] * s.mass }.flatten
61
+ all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] }.flatten
36
62
  end
37
63
  end
64
+
65
+ # warn "looking for copy/paste/edit code across #{all_hashes.size} nodes"
66
+
67
+ all_hashes = all_hashes.to_a
68
+ all_hashes.each_with_index do |(s1, h1), i|
69
+ similar = [s1]
70
+ all_hashes[i+1..-1].each do |(s2, h2)|
71
+ next if detected[h2]
72
+ intersection = h1.intersection h2
73
+ max = [h1.size, h2.size].max
74
+ if intersection.size >= max * 0.60 then
75
+ similarity = s1.similarity(s2)
76
+ if similarity > 0.60 then
77
+ similar << s2
78
+ detected[h2] = true
79
+ else
80
+ p [similarity, s1, s2]
81
+ end
82
+ end
83
+ end
84
+
85
+ self.hashes[similar.first.hash].push(*similar) if similar.size > 1
86
+ end
38
87
  end
39
88
 
40
89
  def prune
@@ -55,30 +104,88 @@ class Flay
55
104
  self.hashes.delete_if { |h,_| all_hashes[h] }
56
105
  end
57
106
 
107
+ def n_way_diff *data
108
+ data.each_with_index do |s, i|
109
+ c = (?A + i).chr
110
+ s.group = c
111
+ end
112
+
113
+ max = data.map { |s| s.scan(/^.*/).size }.max
114
+
115
+ data.map! { |s| # FIX: this is tarded, but I'm out of brain
116
+ c = s.group
117
+ s = s.scan(/^.*/)
118
+ s.push(*([""] * (max - s.size))) # pad
119
+ s.each do |o|
120
+ o.group = c
121
+ end
122
+ s
123
+ }
124
+
125
+ groups = data[0].zip(*data[1..-1])
126
+ groups.map! { |lines|
127
+ collapsed = lines.uniq
128
+ if collapsed.size == 1 then
129
+ " #{lines.first}"
130
+ else
131
+ # TODO: make r2r have a canonical mode (doesn't make 1-liners)
132
+ lines.reject { |l| l.empty? }.map { |l| "#{l.group}: #{l}" }
133
+ end
134
+ }
135
+ groups.flatten.join("\n")
136
+ end
137
+
58
138
  def report prune = nil
59
139
  self.prune
60
140
 
61
- self.hashes.sort_by { |_,nodes|
62
- -(nodes.first.mass * nodes.size)
63
- }.each do |_,nodes|
141
+ identical = {}
142
+ masses = {}
143
+
144
+ self.hashes.each do |hash,nodes|
145
+ identical[hash] = nodes[1..-1].all? { |n| n == nodes.first }
146
+ masses[hash] = nodes.first.mass * nodes.size
147
+ masses[hash] *= (nodes.size) if identical[hash]
148
+ end
149
+
150
+ count = 0
151
+ masses.sort_by { |h,m| [-m, hashes[h].first.file] }.each do |hash,mass|
152
+ nodes = hashes[hash]
64
153
  next unless nodes.first.first == prune if prune
65
154
  puts
66
155
 
156
+ same = identical[hash]
67
157
  node = nodes.first
68
- puts "Matches found in %p (mass = %d)" %
69
- [node.first, nodes.size * node.mass]
158
+ n = nodes.size
159
+ match, bonus = if same then
160
+ ["IDENTICAL", "*#{n}"]
161
+ else
162
+ ["Similar", ""]
163
+ end
164
+
165
+ count += 1
166
+ puts "%d) %s code found in %p (mass%s = %d)" %
167
+ [count, match, node.first, bonus, mass]
168
+
169
+ nodes.each_with_index do |node, i|
170
+ if $v then
171
+ c = (?A + i).chr
172
+ puts " #{c}: #{node.file}:#{node.line}"
173
+ else
174
+ puts " #{node.file}:#{node.line}"
175
+ end
176
+ end
70
177
 
71
- nodes.each do |node|
72
- puts " #{node.file}:#{node.line}"
178
+ if $v then
179
+ puts
180
+ r2r = Ruby2Ruby.new
181
+ puts n_way_diff(*nodes.map { |s| r2r.process(s.deep_clone) })
73
182
  end
74
183
  end
75
184
  end
76
185
  end
77
186
 
78
- class Symbol
79
- def hash
80
- @hash ||= self.to_s.hash
81
- end
187
+ class String
188
+ attr_accessor :group
82
189
  end
83
190
 
84
191
  class Sexp
@@ -108,6 +215,7 @@ class Sexp
108
215
 
109
216
  # TODO: I think this is wrong, since it isn't positional. What to do?
110
217
  l_sexp.zip(r_sexp).each do |l_sub, r_sub|
218
+ next unless l_sub && r_sub # HACK
111
219
  l2, s2, r2 = l_sub.compare_to r_sub
112
220
  l += l2
113
221
  s += s2
@@ -143,16 +251,33 @@ class Sexp
143
251
  yield sexp
144
252
  end
145
253
  end
254
+ end
146
255
 
147
- alias :old_inspect :inspect
148
- def inspect
149
- old_inspect.sub(/\)\Z/, ":h_#{self.fuzzy_hash})")
256
+ class Array
257
+ def intersection other
258
+ intersection, start = [], 0
259
+ other_size = other.length
260
+ self.each_with_index do |m, i|
261
+ (start...other_size).each do |j|
262
+ n = other.at j
263
+ if m == n then
264
+ intersection << m
265
+ start = j + 1
266
+ break
267
+ end
268
+ end
269
+ end
270
+ intersection
150
271
  end
151
272
 
152
- alias :shut_up! :pretty_print
153
- def pretty_print(q) # shows the hash TODO: remove
154
- q.group(1, 'S(', ')') do
155
- q.seplist(self + [":h_#{self.fuzzy_hash}"]) {|v| q.pp v }
273
+ def triangle # TODO: use?
274
+ max = self.size
275
+ (0...max).each do |i|
276
+ o1 = at(i)
277
+ (i+1...max).each do |j|
278
+ o2 = at(j)
279
+ yield o1, o2
280
+ end
156
281
  end
157
282
  end
158
283
  end
@@ -3,7 +3,19 @@
3
3
  require 'test/unit'
4
4
  require 'flay'
5
5
 
6
- class SexpTest < Test::Unit::TestCase
6
+ require 'pp' # TODO: remove
7
+
8
+ class Symbol # for testing only, makes the tests concrete
9
+ def hash
10
+ to_s.hash
11
+ end
12
+
13
+ def <=> o
14
+ Symbol === o && self.to_s <=> o.to_s
15
+ end
16
+ end
17
+
18
+ class TestSexp < Test::Unit::TestCase
7
19
  def setup
8
20
  # a(1) { |c| d }
9
21
  @s = s(:iter,
@@ -57,10 +69,9 @@ class SexpTest < Test::Unit::TestCase
57
69
 
58
70
  def test_all_subhashes
59
71
  expected = [-704571402, -282578980, -35395725,
60
- 160138040, 815971090, 927228382]
72
+ 160138040, 815971090, 927228382] # , 955256285]
61
73
 
62
74
  assert_equal expected, @s.all_subhashes.sort.uniq
63
- assert ! @s.all_subhashes.include?(@s.fuzzy_hash)
64
75
 
65
76
  x = []
66
77
 
@@ -71,4 +82,135 @@ class SexpTest < Test::Unit::TestCase
71
82
  assert_equal expected, x.sort.uniq
72
83
  end
73
84
 
85
+ def test_process_sexp
86
+ flay = Flay.new
87
+
88
+ s = RubyParser.new.process <<-RUBY
89
+ def x(n)
90
+ if n % 2 == 0
91
+ return n
92
+ else
93
+ return n + 1
94
+ end
95
+ end
96
+ RUBY
97
+
98
+ expected = [[:block],
99
+ # HACK [:defn],
100
+ [:scope]] # only ones big enough
101
+
102
+ flay.process_sexp s
103
+
104
+ actual = flay.hashes.values.map { |sexps| sexps.map { |sexp| sexp.first } }
105
+
106
+ assert_equal expected, actual.sort_by { |a| a.first.to_s }
107
+ end
108
+
109
+ def test_process_sexp_full
110
+ flay = Flay.new(1)
111
+
112
+ s = RubyParser.new.process <<-RUBY
113
+ def x(n)
114
+ if n % 2 == 0
115
+ return n
116
+ else
117
+ return n + 1
118
+ end
119
+ end
120
+ RUBY
121
+
122
+ expected = [[:arglist, :arglist, :arglist],
123
+ [:block],
124
+ [:call, :call],
125
+ [:call],
126
+ # HACK [:defn],
127
+ [:if],
128
+ [:return],
129
+ [:return],
130
+ [:scope]]
131
+
132
+ flay.process_sexp s
133
+
134
+ actual = flay.hashes.values.map { |sexps| sexps.map { |sexp| sexp.first } }
135
+
136
+ assert_equal expected, actual.sort_by { |a| a.first.to_s }
137
+ end
138
+
139
+ def test_process_sexp_no_structure
140
+ flay = Flay.new(1)
141
+ flay.process_sexp s(:lit, 1)
142
+
143
+ assert flay.hashes.empty?
144
+ end
145
+
146
+ def test_process_fuzzy_similarities
147
+ flay = Flay.new 7
148
+
149
+ s1 = RubyParser.new.process("def w(n); a; b; c; d; e; end")
150
+ s2 = RubyParser.new.process("def x(n); a; c; e; end")
151
+
152
+ flay.process_sexp s1
153
+ flay.process_sexp s2
154
+
155
+ flay.process_fuzzy_similarities
156
+
157
+ b1 = s1.scope.block
158
+ b2 = s2.scope.block
159
+
160
+ assert_equal [b2, b1], flay.hashes[b2.hash]
161
+ end
162
+
163
+ def test_process_fuzzy_similarities_2
164
+ flay = Flay.new 7
165
+
166
+ s1 = RubyParser.new.process("def w(n); a; b; c; d; e; end")
167
+ s2 = RubyParser.new.process("def x(n); a; c; e; end")
168
+ s3 = RubyParser.new.process("def y(n); a; f; c; g; e; end")
169
+
170
+ flay.process_sexp s1
171
+ flay.process_sexp s2
172
+ flay.process_sexp s3
173
+
174
+ flay.process_fuzzy_similarities
175
+
176
+ b1 = s1.scope.block
177
+ b2 = s2.scope.block
178
+ b3 = s3.scope.block
179
+
180
+ assert_equal [b3, b2, b1], flay.hashes[b3.hash]
181
+ end
182
+
183
+ def test_process_fuzzy_similarities_3
184
+ flay = Flay.new 7
185
+
186
+ s1 = RubyParser.new.process("def w (n); a; b; c; d; e; end")
187
+ s2 = RubyParser.new.process("def x (n); a; c; e; end")
188
+ s3 = RubyParser.new.process("def y (n); a; f; c; g; e; end")
189
+ s4 = RubyParser.new.process("def z (n); f; g; h; i; j; end")
190
+ s5 = RubyParser.new.process("def w1(n); a; b if x; c; d if y; e; end")
191
+
192
+ flay.process_sexp s1
193
+ flay.process_sexp s2
194
+ flay.process_sexp s3
195
+ flay.process_sexp s4
196
+ flay.process_sexp s5
197
+
198
+ flay.process_fuzzy_similarities
199
+
200
+ b1 = s1.scope.block
201
+ b2 = s2.scope.block
202
+ b3 = s3.scope.block
203
+ b5 = s5.scope.block
204
+
205
+ assert_equal [b3, b5, b2, b1], flay.hashes[b3.hash]
206
+ end
207
+ end
208
+
209
+ class ArrayIntersectionTests < Test::Unit::TestCase
210
+ def test_real_array_intersection
211
+ assert_equal [2], [2, 2, 2, 3, 7, 13, 49] & [2, 2, 2, 5, 11, 107]
212
+ assert_equal [2, 2, 2], [2, 2, 2, 3, 7, 13, 49].intersection([2, 2, 2, 5, 11, 107])
213
+ assert_equal ['a', 'c'], ['a', 'b', 'a', 'c'] & ['a', 'c', 'a', 'd']
214
+ assert_equal ['a', 'a'], ['a', 'b', 'a', 'c'].intersection(['a', 'c', 'a', 'd'])
215
+ end
74
216
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flay
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Davis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-11-06 00:00:00 -05:00
12
+ date: 2009-01-20 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency