flay 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (7) hide show
  1. data/History.txt +13 -0
  2. data/README.txt +37 -13
  3. data/Rakefile +27 -0
  4. data/bin/flay +0 -2
  5. data/lib/flay.rb +153 -28
  6. data/test/test_flay.rb +145 -3
  7. metadata +2 -2
@@ -1,3 +1,16 @@
1
+ === 1.1.0 / 2009-01-20
2
+
3
+ * 8 minor enhancement:
4
+
5
+ * Added -v verbose mode to print out N-way diff of the detected code.
6
+ * Added identical node scoring and reporting.
7
+ * Added the start of copy/paste+edit detection, not even close yet
8
+ * Added more tests.
9
+ * Added rcov tasks
10
+ * Added the start of copy/paste+edit detection
11
+ * Clarified output a bit
12
+ * Refactored process_sexps to make doing other languages/systems easier.
13
+
1
14
  === 1.0.0 / 2008-11-06
2
15
 
3
16
  * 1 major enhancement
data/README.txt CHANGED
@@ -15,29 +15,53 @@ style, braces vs do/end, etc are all ignored. Making this totally rad.
15
15
  * Differences in whitespace, programming style, braces vs do/end, etc are ignored.
16
16
  * Works across files.
17
17
  * Reports differences at any level of code.
18
+ * Adds a score multiplier to identical nodes.
19
+ * Run verbose to see an N-way diff of the code.
18
20
 
19
21
  == TODO:
20
22
 
21
23
  * Editor integration (emacs, textmate, other contributions welcome).
22
- * N-way diff reporting... or... something. Not sure.
23
24
  * UI improvement suggestions welcome. :)
25
+ * Score sequence fragments (a;b;c;d;e) vs (b;c;d) etc.
24
26
 
25
27
  == SYNOPSIS:
26
28
 
27
- % flay lib/*.rb
28
- Processing unit/itemconfig.rb...
29
+ % flay -v ~/Work/svn/ruby/ruby_1_8/lib/cgi.rb
30
+ Processing /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb...
29
31
 
30
- Matches found in :when (mass = 572)
31
- unit/itemconfig.rb:343
32
- unit/itemconfig.rb:379
33
- unit/itemconfig.rb:706
34
- unit/itemconfig.rb:742
32
+ Matches found in :defn (mass = 184)
33
+ A: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:1470
34
+ B: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:1925
35
35
 
36
- Matches found in :when (mass = 500)
37
- unit/itemconfig.rb:509
38
- unit/itemconfig.rb:539
39
- unit/itemconfig.rb:875
40
- unit/itemconfig.rb:905
36
+ A: def checkbox_group(name = "", *values)
37
+ B: def radio_group(name = "", *values)
38
+ if name.kind_of?(Hash) then
39
+ values = name["VALUES"]
40
+ name = name["NAME"]
41
+ end
42
+ values.collect do |value|
43
+ if value.kind_of?(String) then
44
+ A: (checkbox(name, value) + value)
45
+ B: (radio_button(name, value) + value)
46
+ else
47
+ if (value[(value.size - 1)] == true) then
48
+ A: (checkbox(name, value[0], true) + value[(value.size - 2)])
49
+ B: (radio_button(name, value[0], true) + value[(value.size - 2)])
50
+ else
51
+ A: (checkbox(name, value[0]) + value[(value.size - 1)])
52
+ B: (radio_button(name, value[0]) + value[(value.size - 1)])
53
+ end
54
+ end
55
+ end.to_s
56
+ end
57
+
58
+ IDENTICAL Matches found in :for (mass*2 = 144)
59
+ A: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:2160
60
+ B: /Users/ryan/Work/svn/ruby/ruby_1_8/lib/cgi.rb:2217
61
+
62
+ for element in ["HTML", "BODY", "P", "DT", "DD", "LI", "OPTION", "THEAD", "TFOOT", "TBODY", "COLGROUP", "TR", "TH", "TD", "HEAD"] do
63
+ methods = (methods + ((" def #{element.downcase}(attributes = {})\n" + nO_element_def(element)) + " end\n"))
64
+ end
41
65
  ...
42
66
 
43
67
  == REQUIREMENTS:
data/Rakefile CHANGED
@@ -16,4 +16,31 @@ Hoe.new('flay', Flay::VERSION) do |flay|
16
16
  flay.extra_deps << ['ruby_parser', '>= 1.1.0']
17
17
  end
18
18
 
19
+ begin
20
+ require 'rcov/rcovtask'
21
+ Rcov::RcovTask.new do |t|
22
+ pattern = ENV['PATTERN'] || 'test/test_*.rb'
23
+
24
+ t.test_files = FileList[pattern]
25
+ t.verbose = true
26
+ t.rcov_opts << "--threshold 80"
27
+ t.rcov_opts << "--no-color"
28
+ end
29
+
30
+ task :rcov_info do
31
+ pattern = ENV['PATTERN'] || "test/test_*.rb"
32
+ ruby "-Ilib -S rcov --text-report --save coverage.info -x rcov,sexp_processor --test-unit-only #{pattern}"
33
+ end
34
+
35
+ task :rcov_overlay do
36
+ rcov, eol = Marshal.load(File.read("coverage.info")).last[ENV["FILE"]], 1
37
+ puts rcov[:lines].zip(rcov[:coverage]).map { |line, coverage|
38
+ bol, eol = eol, eol + line.length
39
+ [bol, eol, "#ffcccc"] unless coverage
40
+ }.compact.inspect
41
+ end
42
+ rescue LoadError
43
+ # skip
44
+ end
45
+
19
46
  # vim: syntax=Ruby
data/bin/flay CHANGED
@@ -1,7 +1,5 @@
1
1
  #!/usr/bin/ruby -s
2
2
 
3
- $m ||= 16
4
-
5
3
  require 'flay'
6
4
 
7
5
  flay = Flay.new($m.to_i)
@@ -1,6 +1,5 @@
1
1
  #!/usr/bin/env ruby -w
2
2
 
3
- $: << "../../sexp_processor/dev/lib" # TODO: remove
4
3
  $: << "../../ruby_parser/dev/lib"
5
4
 
6
5
  require 'rubygems'
@@ -8,9 +7,20 @@ require 'sexp_processor'
8
7
  require 'ruby_parser'
9
8
  require 'pp' # TODO: remove
10
9
 
10
+ $m ||= 16
11
+ $v ||= false
12
+ $f ||= false
13
+
14
+ if $v then
15
+ $: << "../../ruby2ruby/dev/lib"
16
+ require 'ruby2ruby'
17
+ require 'tempfile'
18
+ end
19
+
11
20
  class Flay
12
- VERSION = '1.0.0'
21
+ VERSION = '1.1.0'
13
22
 
23
+ attr_accessor :mass_threshold
14
24
  attr_reader :hashes
15
25
 
16
26
  def initialize(mass = 16)
@@ -20,21 +30,60 @@ class Flay
20
30
 
21
31
  def process(*files)
22
32
  files.each do |file|
23
- warn "Processing #{file}..."
33
+ warn "Processing #{file}"
24
34
 
25
- t = Time.now
26
35
  pt = RubyParser.new.process(File.read(file), file)
27
-
28
36
  next unless pt # empty files... hahaha, suck.
29
37
 
30
- t = Time.now
31
- pt.deep_each do |node|
32
- next unless node.any? { |sub| Sexp === sub }
33
- next if node.mass < @mass_threshold
38
+ process_sexp pt
39
+ end
40
+
41
+ process_fuzzy_similarities if $f
42
+ end
43
+
44
+ def process_sexp pt
45
+ pt.deep_each do |node|
46
+ next unless node.any? { |sub| Sexp === sub }
47
+ next if node.mass < self.mass_threshold
34
48
 
35
- self.hashes[node.fuzzy_hash] << node
49
+ self.hashes[node.fuzzy_hash] << node
50
+ end
51
+ end
52
+
53
+ def process_fuzzy_similarities
54
+ all_hashes, detected = {}, {}
55
+
56
+ self.hashes.values.each do |nodes|
57
+ nodes.each do |node|
58
+ next if node.mass > 4 * self.mass_threshold
59
+ # TODO: try out with fuzzy_hash
60
+ # all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] * s.mass }.flatten
61
+ all_hashes[node] = node.grep(Sexp).map { |s| [s.hash] }.flatten
36
62
  end
37
63
  end
64
+
65
+ # warn "looking for copy/paste/edit code across #{all_hashes.size} nodes"
66
+
67
+ all_hashes = all_hashes.to_a
68
+ all_hashes.each_with_index do |(s1, h1), i|
69
+ similar = [s1]
70
+ all_hashes[i+1..-1].each do |(s2, h2)|
71
+ next if detected[h2]
72
+ intersection = h1.intersection h2
73
+ max = [h1.size, h2.size].max
74
+ if intersection.size >= max * 0.60 then
75
+ similarity = s1.similarity(s2)
76
+ if similarity > 0.60 then
77
+ similar << s2
78
+ detected[h2] = true
79
+ else
80
+ p [similarity, s1, s2]
81
+ end
82
+ end
83
+ end
84
+
85
+ self.hashes[similar.first.hash].push(*similar) if similar.size > 1
86
+ end
38
87
  end
39
88
 
40
89
  def prune
@@ -55,30 +104,88 @@ class Flay
55
104
  self.hashes.delete_if { |h,_| all_hashes[h] }
56
105
  end
57
106
 
107
+ def n_way_diff *data
108
+ data.each_with_index do |s, i|
109
+ c = (?A + i).chr
110
+ s.group = c
111
+ end
112
+
113
+ max = data.map { |s| s.scan(/^.*/).size }.max
114
+
115
+ data.map! { |s| # FIX: this is tarded, but I'm out of brain
116
+ c = s.group
117
+ s = s.scan(/^.*/)
118
+ s.push(*([""] * (max - s.size))) # pad
119
+ s.each do |o|
120
+ o.group = c
121
+ end
122
+ s
123
+ }
124
+
125
+ groups = data[0].zip(*data[1..-1])
126
+ groups.map! { |lines|
127
+ collapsed = lines.uniq
128
+ if collapsed.size == 1 then
129
+ " #{lines.first}"
130
+ else
131
+ # TODO: make r2r have a canonical mode (doesn't make 1-liners)
132
+ lines.reject { |l| l.empty? }.map { |l| "#{l.group}: #{l}" }
133
+ end
134
+ }
135
+ groups.flatten.join("\n")
136
+ end
137
+
58
138
  def report prune = nil
59
139
  self.prune
60
140
 
61
- self.hashes.sort_by { |_,nodes|
62
- -(nodes.first.mass * nodes.size)
63
- }.each do |_,nodes|
141
+ identical = {}
142
+ masses = {}
143
+
144
+ self.hashes.each do |hash,nodes|
145
+ identical[hash] = nodes[1..-1].all? { |n| n == nodes.first }
146
+ masses[hash] = nodes.first.mass * nodes.size
147
+ masses[hash] *= (nodes.size) if identical[hash]
148
+ end
149
+
150
+ count = 0
151
+ masses.sort_by { |h,m| [-m, hashes[h].first.file] }.each do |hash,mass|
152
+ nodes = hashes[hash]
64
153
  next unless nodes.first.first == prune if prune
65
154
  puts
66
155
 
156
+ same = identical[hash]
67
157
  node = nodes.first
68
- puts "Matches found in %p (mass = %d)" %
69
- [node.first, nodes.size * node.mass]
158
+ n = nodes.size
159
+ match, bonus = if same then
160
+ ["IDENTICAL", "*#{n}"]
161
+ else
162
+ ["Similar", ""]
163
+ end
164
+
165
+ count += 1
166
+ puts "%d) %s code found in %p (mass%s = %d)" %
167
+ [count, match, node.first, bonus, mass]
168
+
169
+ nodes.each_with_index do |node, i|
170
+ if $v then
171
+ c = (?A + i).chr
172
+ puts " #{c}: #{node.file}:#{node.line}"
173
+ else
174
+ puts " #{node.file}:#{node.line}"
175
+ end
176
+ end
70
177
 
71
- nodes.each do |node|
72
- puts " #{node.file}:#{node.line}"
178
+ if $v then
179
+ puts
180
+ r2r = Ruby2Ruby.new
181
+ puts n_way_diff(*nodes.map { |s| r2r.process(s.deep_clone) })
73
182
  end
74
183
  end
75
184
  end
76
185
  end
77
186
 
78
- class Symbol
79
- def hash
80
- @hash ||= self.to_s.hash
81
- end
187
+ class String
188
+ attr_accessor :group
82
189
  end
83
190
 
84
191
  class Sexp
@@ -108,6 +215,7 @@ class Sexp
108
215
 
109
216
  # TODO: I think this is wrong, since it isn't positional. What to do?
110
217
  l_sexp.zip(r_sexp).each do |l_sub, r_sub|
218
+ next unless l_sub && r_sub # HACK
111
219
  l2, s2, r2 = l_sub.compare_to r_sub
112
220
  l += l2
113
221
  s += s2
@@ -143,16 +251,33 @@ class Sexp
143
251
  yield sexp
144
252
  end
145
253
  end
254
+ end
146
255
 
147
- alias :old_inspect :inspect
148
- def inspect
149
- old_inspect.sub(/\)\Z/, ":h_#{self.fuzzy_hash})")
256
+ class Array
257
+ def intersection other
258
+ intersection, start = [], 0
259
+ other_size = other.length
260
+ self.each_with_index do |m, i|
261
+ (start...other_size).each do |j|
262
+ n = other.at j
263
+ if m == n then
264
+ intersection << m
265
+ start = j + 1
266
+ break
267
+ end
268
+ end
269
+ end
270
+ intersection
150
271
  end
151
272
 
152
- alias :shut_up! :pretty_print
153
- def pretty_print(q) # shows the hash TODO: remove
154
- q.group(1, 'S(', ')') do
155
- q.seplist(self + [":h_#{self.fuzzy_hash}"]) {|v| q.pp v }
273
+ def triangle # TODO: use?
274
+ max = self.size
275
+ (0...max).each do |i|
276
+ o1 = at(i)
277
+ (i+1...max).each do |j|
278
+ o2 = at(j)
279
+ yield o1, o2
280
+ end
156
281
  end
157
282
  end
158
283
  end
@@ -3,7 +3,19 @@
3
3
  require 'test/unit'
4
4
  require 'flay'
5
5
 
6
- class SexpTest < Test::Unit::TestCase
6
+ require 'pp' # TODO: remove
7
+
8
+ class Symbol # for testing only, makes the tests concrete
9
+ def hash
10
+ to_s.hash
11
+ end
12
+
13
+ def <=> o
14
+ Symbol === o && self.to_s <=> o.to_s
15
+ end
16
+ end
17
+
18
+ class TestSexp < Test::Unit::TestCase
7
19
  def setup
8
20
  # a(1) { |c| d }
9
21
  @s = s(:iter,
@@ -57,10 +69,9 @@ class SexpTest < Test::Unit::TestCase
57
69
 
58
70
  def test_all_subhashes
59
71
  expected = [-704571402, -282578980, -35395725,
60
- 160138040, 815971090, 927228382]
72
+ 160138040, 815971090, 927228382] # , 955256285]
61
73
 
62
74
  assert_equal expected, @s.all_subhashes.sort.uniq
63
- assert ! @s.all_subhashes.include?(@s.fuzzy_hash)
64
75
 
65
76
  x = []
66
77
 
@@ -71,4 +82,135 @@ class SexpTest < Test::Unit::TestCase
71
82
  assert_equal expected, x.sort.uniq
72
83
  end
73
84
 
85
+ def test_process_sexp
86
+ flay = Flay.new
87
+
88
+ s = RubyParser.new.process <<-RUBY
89
+ def x(n)
90
+ if n % 2 == 0
91
+ return n
92
+ else
93
+ return n + 1
94
+ end
95
+ end
96
+ RUBY
97
+
98
+ expected = [[:block],
99
+ # HACK [:defn],
100
+ [:scope]] # only ones big enough
101
+
102
+ flay.process_sexp s
103
+
104
+ actual = flay.hashes.values.map { |sexps| sexps.map { |sexp| sexp.first } }
105
+
106
+ assert_equal expected, actual.sort_by { |a| a.first.to_s }
107
+ end
108
+
109
+ def test_process_sexp_full
110
+ flay = Flay.new(1)
111
+
112
+ s = RubyParser.new.process <<-RUBY
113
+ def x(n)
114
+ if n % 2 == 0
115
+ return n
116
+ else
117
+ return n + 1
118
+ end
119
+ end
120
+ RUBY
121
+
122
+ expected = [[:arglist, :arglist, :arglist],
123
+ [:block],
124
+ [:call, :call],
125
+ [:call],
126
+ # HACK [:defn],
127
+ [:if],
128
+ [:return],
129
+ [:return],
130
+ [:scope]]
131
+
132
+ flay.process_sexp s
133
+
134
+ actual = flay.hashes.values.map { |sexps| sexps.map { |sexp| sexp.first } }
135
+
136
+ assert_equal expected, actual.sort_by { |a| a.first.to_s }
137
+ end
138
+
139
+ def test_process_sexp_no_structure
140
+ flay = Flay.new(1)
141
+ flay.process_sexp s(:lit, 1)
142
+
143
+ assert flay.hashes.empty?
144
+ end
145
+
146
+ def test_process_fuzzy_similarities
147
+ flay = Flay.new 7
148
+
149
+ s1 = RubyParser.new.process("def w(n); a; b; c; d; e; end")
150
+ s2 = RubyParser.new.process("def x(n); a; c; e; end")
151
+
152
+ flay.process_sexp s1
153
+ flay.process_sexp s2
154
+
155
+ flay.process_fuzzy_similarities
156
+
157
+ b1 = s1.scope.block
158
+ b2 = s2.scope.block
159
+
160
+ assert_equal [b2, b1], flay.hashes[b2.hash]
161
+ end
162
+
163
+ def test_process_fuzzy_similarities_2
164
+ flay = Flay.new 7
165
+
166
+ s1 = RubyParser.new.process("def w(n); a; b; c; d; e; end")
167
+ s2 = RubyParser.new.process("def x(n); a; c; e; end")
168
+ s3 = RubyParser.new.process("def y(n); a; f; c; g; e; end")
169
+
170
+ flay.process_sexp s1
171
+ flay.process_sexp s2
172
+ flay.process_sexp s3
173
+
174
+ flay.process_fuzzy_similarities
175
+
176
+ b1 = s1.scope.block
177
+ b2 = s2.scope.block
178
+ b3 = s3.scope.block
179
+
180
+ assert_equal [b3, b2, b1], flay.hashes[b3.hash]
181
+ end
182
+
183
+ def test_process_fuzzy_similarities_3
184
+ flay = Flay.new 7
185
+
186
+ s1 = RubyParser.new.process("def w (n); a; b; c; d; e; end")
187
+ s2 = RubyParser.new.process("def x (n); a; c; e; end")
188
+ s3 = RubyParser.new.process("def y (n); a; f; c; g; e; end")
189
+ s4 = RubyParser.new.process("def z (n); f; g; h; i; j; end")
190
+ s5 = RubyParser.new.process("def w1(n); a; b if x; c; d if y; e; end")
191
+
192
+ flay.process_sexp s1
193
+ flay.process_sexp s2
194
+ flay.process_sexp s3
195
+ flay.process_sexp s4
196
+ flay.process_sexp s5
197
+
198
+ flay.process_fuzzy_similarities
199
+
200
+ b1 = s1.scope.block
201
+ b2 = s2.scope.block
202
+ b3 = s3.scope.block
203
+ b5 = s5.scope.block
204
+
205
+ assert_equal [b3, b5, b2, b1], flay.hashes[b3.hash]
206
+ end
207
+ end
208
+
209
+ class ArrayIntersectionTests < Test::Unit::TestCase
210
+ def test_real_array_intersection
211
+ assert_equal [2], [2, 2, 2, 3, 7, 13, 49] & [2, 2, 2, 5, 11, 107]
212
+ assert_equal [2, 2, 2], [2, 2, 2, 3, 7, 13, 49].intersection([2, 2, 2, 5, 11, 107])
213
+ assert_equal ['a', 'c'], ['a', 'b', 'a', 'c'] & ['a', 'c', 'a', 'd']
214
+ assert_equal ['a', 'a'], ['a', 'b', 'a', 'c'].intersection(['a', 'c', 'a', 'd'])
215
+ end
74
216
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: flay
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Davis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-11-06 00:00:00 -05:00
12
+ date: 2009-01-20 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency