doppelganger 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,105 @@
1
+ require "#{Doppelganger::LIBPATH}doppelganger/unified_ruby"
2
+
3
+ MethodDef = Struct.new(:name, :args, :body, :node, :filename, :line, :flat_body_array, :last_line)
4
+ BlockNode = Struct.new(:body, :node, :filename, :line, :flat_body_array, :last_line)
5
+ IterNode = Struct.new(:call_node, :asgn_node, :body, :node, :filename, :line, :flat_body_array, :last_line)
6
+
7
+ module Doppelganger
8
+ # This class goes through all the ruby files in a directory and parses it into Sexp's.
9
+ # It then exracts the definitions and stores them all and then includes the NodeAnalysis module
10
+ # with allows a number of comparisons.
11
+ class Extractor < SexpProcessor
12
+ include UnifiedRuby
13
+
14
+ attr_reader :sexp_blocks, :dir
15
+
16
+ def initialize
17
+ super
18
+ self.auto_shift_type = true
19
+ @rp = RubyParser.new
20
+ @sexp_blocks = []
21
+ end
22
+
23
+ # This goes through all the files in the directory and parses them extracting
24
+ # all the block-like nodes.
25
+ def extract_blocks(dir)
26
+ @dir = File.expand_path(dir)
27
+ if File.directory? @dir
28
+ Find.find(*Dir["#{self.dir}/**/*.rb"]) do |filename|
29
+ if File.file? filename
30
+ sexp = @rp.process(File.read(filename), filename)
31
+ self.process(sexp)
32
+ end
33
+ end
34
+ elsif File.file? @dir
35
+ sexp = @rp.process(File.read(@dir), @dir)
36
+ self.process(sexp)
37
+ end
38
+ @sexp_blocks
39
+ end
40
+
41
+ def process_defn(exp)
42
+ method = MethodDef.new
43
+ method.name = exp.shift
44
+ method.args = process(exp.shift)
45
+ method.last_line = exp.last_line_number
46
+ method.body = process(exp.shift)
47
+ method.node = s(:defn, method.name, method.args, method.body.dup)
48
+ method.flat_body_array = method.body.dup.remove_literals.to_flat_ary
49
+ method.filename = exp.file
50
+ method.line = exp.line
51
+
52
+ unless method.body == s(:scope, s(:block, s(:nil)))
53
+ @sexp_blocks << method
54
+ end
55
+ method.node
56
+ end
57
+
58
+ def process_block(exp)
59
+ block_node = BlockNode.new
60
+ block_node.last_line = exp.last_line_number
61
+ if exp.size > 1
62
+ block_node.body = s()
63
+ until (exp.empty?) do
64
+ block_node.body << process(exp.shift)
65
+ end
66
+ block_node.node = s(:block, *block_node.body.dup)
67
+ else
68
+ block_node.body = exp.shift
69
+ block_node.node = s(:block, block_node.body.dup)
70
+ end
71
+
72
+ block_node.flat_body_array = block_node.body.dup.remove_literals.to_flat_ary
73
+ block_node.filename = exp.file
74
+ block_node.line = exp.line
75
+
76
+ unless block_node.body == s(:nil)
77
+ @sexp_blocks << block_node
78
+ end
79
+ block_node.node
80
+ end
81
+
82
+ def process_iter(exp)
83
+ unless exp[2][0] == :block
84
+ iter_node = IterNode.new
85
+ iter_node.call_node = process(exp.shift)
86
+ iter_node.asgn_node = process(exp.shift)
87
+ iter_node.last_line = exp.last_line_number
88
+ iter_node.body = process(exp.shift)
89
+ iter_node.node = s(:iter, iter_node.call_node, iter_node.asgn_node, iter_node.body.dup)
90
+ iter_node.flat_body_array = iter_node.body.dup.remove_literals.to_flat_ary
91
+ iter_node.filename = exp.file
92
+ iter_node.line = exp.line
93
+
94
+ @sexp_blocks << iter_node
95
+ iter_node.node
96
+ else
97
+ call_node = process(exp.shift)
98
+ asgn_node = process(exp.shift)
99
+ body = process(exp.shift)
100
+ s(:iter, call_node, asgn_node, body)
101
+ end
102
+ end
103
+
104
+ end
105
+ end
@@ -0,0 +1,23 @@
1
+ class Array
2
+ # Return all duplicate elments (uses <tt>==</tt> for comparison).
3
+ def duplicates?(element)
4
+ (self.select {|elem| elem == element}).size > 1
5
+ end
6
+
7
+ def stepwise(compare_method) #:nodoc:
8
+ self.each do |element1|
9
+ self.each do |element2|
10
+ next if element1.send(compare_method) == element2.send(compare_method)
11
+ yield element1, element2
12
+ end
13
+ end
14
+ end
15
+
16
+ def comparing_collect #:nodoc:
17
+ accumulator = [] # collect implementation copied from Rubinius
18
+ stepwise do |element1, element2|
19
+ accumulator << element1 if yield(element1, element2)
20
+ end
21
+ accumulator.compact.uniq
22
+ end
23
+ end
@@ -0,0 +1,89 @@
1
+ # This is pulled in part from Ryan Davis' Sexp additions in Flay.
2
+
3
+ class Sexp
4
+ # Performs the block on every Sexp in this sexp.
5
+ def deep_each(&block)
6
+ self.each_sexp do |sexp|
7
+ block[sexp]
8
+ sexp.deep_each(&block)
9
+ end
10
+ end
11
+
12
+ # Finds the last line of the Sexp if that information is available.
13
+ def last_line_number
14
+ line_number = nil
15
+ self.deep_each do |sub_node|
16
+ if sub_node.respond_to? :line
17
+ line_number = sub_node.line
18
+ end
19
+ end
20
+ line_number
21
+ end
22
+
23
+ # Maps all sub Sexps into a new Sexp, if the node isn't a Sexp
24
+ # performs the block and maps the result into the new Sexp.
25
+ def map_sexps
26
+ self.inject(s()) do |sexps, sexp|
27
+ unless Sexp === sexp
28
+ sexps << sexp
29
+ else
30
+ sexps << yield(sexp)
31
+ end
32
+ sexps
33
+ end
34
+ end
35
+
36
+ # Rejects all objects in the Sexp that return true for the block.
37
+ def deep_reject(&block)
38
+ output_sexp = self.reject do |node|
39
+ block[node]
40
+ end
41
+ output_sexp.map_sexps do |sexp|
42
+ sexp.deep_reject(&block)
43
+ end
44
+ end
45
+
46
+ # Removes all literals from the Sexp (Symbols aren't excluded as they are used internally
47
+ # by Sexp for node names which identifies structure important for comparison.)
48
+ def remove_literals
49
+ self.deep_reject do |node|
50
+ !((node.is_a?(Symbol)) || (node.is_a?(Sexp)))
51
+ end
52
+ end
53
+
54
+ # Iterates through each child Sexp of the current Sexp.
55
+ def each_sexp
56
+ self.each do |sexp|
57
+ next unless Sexp === sexp
58
+ yield sexp
59
+ end
60
+ end
61
+
62
+ # Performs the block on every Sexp in this sexp, looking for one that returns true.
63
+ def deep_any?(&block)
64
+ self.any_sexp? do |sexp|
65
+ block[sexp] || sexp.deep_any?(&block)
66
+ end
67
+ end
68
+
69
+ # Iterates through each child Sexp of the current Sexp and looks for any Sexp
70
+ # that returns true for the block.
71
+ def any_sexp?
72
+ self.any? do |sexp|
73
+ next unless Sexp === sexp
74
+ yield sexp
75
+ end
76
+ end
77
+
78
+ # Determines if the passed in block node is contained with in the Sexp node.
79
+ def contains_block?(block_node)
80
+ self.deep_any? do |sexp|
81
+ sexp == block_node
82
+ end
83
+ end
84
+
85
+ # First turns the Sexp into an Array then flattens it.
86
+ def to_flat_ary
87
+ self.to_a.flatten
88
+ end
89
+ end
@@ -0,0 +1,121 @@
1
+ module Doppelganger
2
+ # This handles the comparison of the Ruby nodes.
3
+ #
4
+ # This will use various iterators to compare all the diffent block-like nodes
5
+ # in your code base and find similar or duplicate nodes.
6
+ class NodeAnalysis
7
+
8
+ attr_accessor :sexp_blocks
9
+
10
+ def initialize(sexp_blocks)
11
+ @sexp_blocks = sexp_blocks
12
+ end
13
+
14
+ # Are there any duplicates in the code base.
15
+ def duplication?
16
+ not duplicates.empty?
17
+ end
18
+
19
+ # Finds blocks of code that are exact duplicates, node for node. All duplicate
20
+ # blocks are grouped together.
21
+ def duplicates
22
+ block_nodes = @sexp_blocks.map{ |sblock| sblock.body.remove_literals }
23
+ (@sexp_blocks.inject([]) do |duplicate_blocks, sblock|
24
+ node_body = sblock.body.remove_literals
25
+ if block_nodes.duplicates?(node_body)
26
+ if duplicate_blocks.map{|sb| sb.first.body.remove_literals}.include?(node_body)
27
+ duplicate_blocks.find{|sb| sb.first.body.remove_literals == node_body } << sblock
28
+ else
29
+ duplicate_blocks << [sblock]
30
+ end
31
+ end
32
+ duplicate_blocks
33
+ end).compact.uniq
34
+ end
35
+
36
+ # Finds block-like nodes that differ from another node by the threshold or less, but are not duplicates.
37
+ def diff(threshold, progress_bar = nil)
38
+ diff_nodes = []
39
+ @compared_node_pairs = []
40
+ stepwise_sblocks(progress_bar) do |block_node_1, block_node_2|
41
+ if threshold >= Diff::LCS.diff(block_node_1.flat_body_array, block_node_2.flat_body_array).size
42
+ diff_nodes << [block_node_1, block_node_2]
43
+ end
44
+ @compared_node_pairs << [block_node_1, block_node_2]
45
+ end
46
+ @compared_node_pairs = []
47
+ cleanup_descendant_duplicate_matches(diff_nodes)
48
+ end
49
+
50
+ # Finds block-like nodes that differ by a given threshold percentage or less, but are not duplicates.
51
+ def percent_diff(percentage, progress_bar = nil)
52
+ # To calculate the percentage we can do this in one of two ways we can compare
53
+ # total differences (the diff set flattened) over the total nodes (the flattened bodies added)
54
+ # or we can compare the number of change sets (the size of the diff) over the average number of nodes
55
+ # in the two methods.
56
+ # Not sure which is best but I've gone with the former for now.
57
+ diff_nodes = []
58
+ @compared_node_pairs = []
59
+ stepwise_sblocks(progress_bar) do |block_node_1, block_node_2|
60
+ total_nodes = block_node_1.flat_body_array.size + block_node_2.flat_body_array.size
61
+ diff_size = Diff::LCS.diff(block_node_1.flat_body_array, block_node_2.flat_body_array).flatten.size
62
+ if percentage >= (diff_size.to_f/total_nodes.to_f * 100)
63
+ diff_nodes << [block_node_1, block_node_2]
64
+ end
65
+ @compared_node_pairs << [block_node_1, block_node_2]
66
+ end
67
+ @compared_node_pairs = []
68
+ cleanup_descendant_duplicate_matches(diff_nodes)
69
+ end
70
+
71
+ protected
72
+ def stepwise_sblocks(progress_bar = nil)
73
+ @sexp_blocks.dup.each do |node1|
74
+ @sexp_blocks.dup.each do |node2|
75
+ progress_bar.inc unless progress_bar.nil?
76
+ next if nodes_compared? node1, node2
77
+ next if node1.body.remove_literals == node2.body.remove_literals
78
+ next if one_node_is_child_of_the_other? node1, node2
79
+ yield node1, node2
80
+ end
81
+ end
82
+ end
83
+
84
+ def node_includes_block?(element, block_node)
85
+ (element.filename == block_node.filename) &&
86
+ ((element.line..(element.last_line+1)).include?(block_node.line) ||
87
+ element.node.contains_block?(block_node.node))
88
+ end
89
+
90
+ def cleanup_descendant_duplicate_matches(diff_nodes)
91
+ diff_nodes.reject do |block_node_pair|
92
+ ancestor_pair_in_results?(block_node_pair, diff_nodes)
93
+ end
94
+ end
95
+
96
+ def ancestor_pair_in_results?(pair, results)
97
+ matches = results.select do |block_node_pair|
98
+ block_node_pair.any?{|n| node_includes_block?(n, pair.first)} &&
99
+ block_node_pair.any?{|n| node_includes_block?(n, pair.last)}
100
+ end
101
+ matches.size > 1
102
+ end
103
+
104
+ def one_node_is_child_of_the_other?(node1, node2)
105
+ if node1.is_a?(MethodDef) && node2.is_a?(BlockNode)
106
+ (node_includes_block?(node1, node2))
107
+ elsif node1.is_a?(BlockNode) && node2.is_a?(MethodDef)
108
+ (node_includes_block?(node2, node1))
109
+ else
110
+ (node_includes_block?(node1, node2) || node_includes_block?(node2, node1))
111
+ end
112
+ end
113
+
114
+ def nodes_compared?(node1, node2)
115
+ @compared_node_pairs.any? do |block_node_pair|
116
+ block_pair_nodes = block_node_pair.map(&:node)
117
+ block_pair_nodes.include?(node1.node) && block_pair_nodes.include?(node2.node)
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,358 @@
1
+ # This is copied from parse tree but with the some of the "raise" statements
2
+ # removed. Also the rewrite_masgn method has been removed because of
3
+ # many places where masgn is used that doesn't meet the requirements
4
+ # of the first if/raise line.
5
+
6
+ $TESTING ||= false
7
+
8
+ module UnifiedRuby
9
+ def process exp
10
+ exp = Sexp.from_array exp unless Sexp === exp or exp.nil?
11
+ super
12
+ end
13
+
14
+ def rewrite_argscat exp
15
+ _, ary, val = exp
16
+ ary = s(:array, ary) unless ary.first == :array
17
+ ary << s(:splat, val)
18
+ end
19
+
20
+ def rewrite_argspush exp
21
+ exp[0] = :arglist
22
+ exp
23
+ end
24
+
25
+ def rewrite_attrasgn(exp)
26
+ last = exp.last
27
+
28
+ if Sexp === last then
29
+ last[0] = :arglist if last[0] == :array
30
+ else
31
+ exp << s(:arglist)
32
+ end
33
+
34
+ exp
35
+ end
36
+
37
+ def rewrite_begin(exp)
38
+ if exp.size > 2
39
+ exp
40
+ else
41
+ exp.last
42
+ end
43
+ end
44
+
45
+ def rewrite_block_pass exp
46
+ if exp.size == 3 then
47
+ _, block, recv = exp
48
+ case recv.first
49
+ when :super then
50
+ recv << s(:block_pass, block)
51
+ exp = recv
52
+ when :call then
53
+ recv.last << s(:block_pass, block)
54
+ exp = recv
55
+ else
56
+ exp
57
+ end
58
+ end
59
+
60
+ exp
61
+ end
62
+
63
+ def rewrite_bmethod(exp)
64
+ _, args, body = exp
65
+
66
+ args ||= s(:array)
67
+ body ||= s(:block)
68
+
69
+ args = s(:args, args) unless args[0] == :array
70
+
71
+ args = args[1] if args[1] && args[1][0] == :masgn # TODO: clean up
72
+ args = args[1] if args[1] && args[1][0] == :array
73
+ args[0] = :args
74
+
75
+ # this is ugly because rewriters are depth first.
76
+ # TODO: maybe we could come up with some way to do both forms of rewriting.
77
+ args.map! { |s|
78
+ if Sexp === s
79
+ case s[0]
80
+ when :lasgn then
81
+ s[1]
82
+ when :splat then
83
+ :"*#{s[1][1]}"
84
+ else
85
+ raise "huh?: #{s.inspect}"
86
+ end
87
+ else
88
+ s
89
+ end
90
+ }
91
+
92
+ body = s(:block, body) unless body[0] == :block
93
+ body.insert 1, args
94
+
95
+ s(:scope, body)
96
+ end
97
+
98
+ def rewrite_call(exp)
99
+ args = exp.last
100
+ case args
101
+ when nil
102
+ exp.pop
103
+ when Array
104
+ case args.first
105
+ when :array, :arglist then
106
+ args[0] = :arglist
107
+ when :argscat, :splat then
108
+ exp[-1] = s(:arglist, args)
109
+ else
110
+ raise "unknown type in call #{args.first.inspect} in #{exp.inspect}"
111
+ end
112
+ return exp
113
+ end
114
+
115
+ exp << s(:arglist)
116
+
117
+ exp
118
+ end
119
+
120
+ def rewrite_dasgn(exp)
121
+ exp[0] = :lasgn
122
+ exp
123
+ end
124
+
125
+ alias :rewrite_dasgn_curr :rewrite_dasgn
126
+
127
+ ##
128
+ # :defn is one of the most complex of all the ASTs in ruby. We do
129
+ # one of 3 different translations:
130
+ #
131
+ # 1) From:
132
+ #
133
+ # s(:defn, :name, s(:scope, s(:block, s(:args, ...), ...)))
134
+ # s(:defn, :name, s(:bmethod, s(:masgn, s(:dasgn_curr, :args)), s(:block, ...)))
135
+ # s(:defn, :name, s(:fbody, s(:bmethod, s(:masgn, s(:dasgn_curr, :splat)), s(:block, ...))))
136
+ #
137
+ # to:
138
+ #
139
+ # s(:defn, :name, s(:args, ...), s(:scope, s:(block, ...)))
140
+ #
141
+ # 2) From:
142
+ #
143
+ # s(:defn, :writer=, s(:attrset, :@name))
144
+ #
145
+ # to:
146
+ #
147
+ # s(:defn, :writer=, s(:args), s(:attrset, :@name))
148
+ #
149
+ # 3) From:
150
+ #
151
+ # s(:defn, :reader, s(:ivar, :@name))
152
+ #
153
+ # to:
154
+ #
155
+ # s(:defn, :reader, s(:args), s(:ivar, :@name))
156
+ #
157
+
158
+ def rewrite_defn(exp)
159
+ weirdo = exp.ivar || exp.attrset
160
+ fbody = exp.fbody(true)
161
+
162
+ weirdo ||= fbody.cfunc if fbody
163
+
164
+ exp.push(fbody.scope) if fbody unless weirdo
165
+
166
+ args = exp.scope.block.args(true) unless weirdo
167
+ exp.insert 2, args if args
168
+
169
+ # move block_arg up and in
170
+ block_arg = exp.scope.block.block_arg(true) rescue nil
171
+ if block_arg
172
+ block = args.block(true)
173
+ args << :"&#{block_arg.last}"
174
+ args << block if block
175
+ end
176
+
177
+ # patch up attr_accessor methods
178
+ if weirdo then
179
+ case
180
+ when fbody && fbody.cfunc then
181
+ exp.insert 2, s(:args, :"*args")
182
+ when exp.ivar then
183
+ exp.insert 2, s(:args)
184
+ when exp.attrset then
185
+ exp.insert 2, s(:args, :arg)
186
+ else
187
+ raise "unknown wierdo: #{wierdo.inpsect}"
188
+ end
189
+ end
190
+
191
+ exp
192
+ end
193
+
194
+ def rewrite_defs(exp)
195
+ receiver = exp.delete_at 1
196
+
197
+ # TODO: I think this would be better as rewrite_scope, but that breaks others
198
+ exp = s(exp.shift, exp.shift,
199
+ s(:scope,
200
+ s(:block, exp.scope.args))) if exp.scope && exp.scope.args
201
+
202
+ result = rewrite_defn(exp)
203
+ result.insert 1, receiver
204
+
205
+ result
206
+ end
207
+
208
+ def rewrite_dmethod(exp)
209
+ exp.shift # type
210
+ exp.shift # dmethod name
211
+ exp.shift # scope / block / body
212
+ end
213
+
214
+ def rewrite_dvar(exp)
215
+ exp[0] = :lvar
216
+ exp
217
+ end
218
+
219
+ def rewrite_fcall(exp)
220
+ exp[0] = :call
221
+ exp.insert 1, nil
222
+
223
+ rewrite_call(exp)
224
+ end
225
+
226
+ def rewrite_op_asgn1(exp)
227
+ exp[2][0] = :arglist # if exp[2][0] == :array
228
+ exp
229
+ end
230
+
231
+ def rewrite_resbody(exp)
232
+ exp[1] ||= s(:array) # no args
233
+
234
+ body = exp[2]
235
+ if body then
236
+ case body.first
237
+ when :lasgn, :iasgn then
238
+ exp[1] << exp.delete_at(2) if body[-1] == s(:gvar, :$!)
239
+ when :block then
240
+ exp[1] << body.delete_at(1) if [:lasgn, :iasgn].include?(body[1][0]) &&
241
+ body[1][-1] == s(:gvar, :$!)
242
+ end
243
+ end
244
+
245
+ exp << nil if exp.size == 2 # no body
246
+
247
+ exp
248
+ end
249
+
250
+ def rewrite_rescue(exp)
251
+ # SKETCHY HACK return exp if exp.size > 4
252
+ ignored = exp.shift
253
+ body = exp.shift unless exp.first.first == :resbody
254
+ resbody = exp.shift
255
+ els = exp.shift unless exp.first.first == :resbody unless exp.empty?
256
+ rest = exp.empty? ? nil : exp # graceful re-rewriting (see rewrite_begin)
257
+
258
+ resbodies = []
259
+
260
+ unless rest then
261
+ while resbody do
262
+ resbodies << resbody
263
+ resbody = resbody.resbody(true)
264
+ end
265
+
266
+ resbodies.each do |resbody|
267
+ if resbody[2] && resbody[2][0] == :block && resbody[2].size == 2 then
268
+ resbody[2] = resbody[2][-1]
269
+ end
270
+ end
271
+ else
272
+ resbodies = [resbody] + rest
273
+ end
274
+
275
+ resbodies << els if els
276
+
277
+ s(:rescue, body, *resbodies).compact
278
+ end
279
+
280
+ def rewrite_splat(exp)
281
+ good = [:arglist, :argspush, :array, :svalue, :yield, :super].include? context.first
282
+ exp = s(:array, exp) unless good
283
+ exp
284
+ end
285
+
286
+ def rewrite_super(exp)
287
+ return exp if exp.structure.flatten.first(3) == [:super, :array, :splat]
288
+ exp.push(*exp.pop[1..-1]) if exp.size == 2 && exp.last.first == :array
289
+ exp
290
+ end
291
+
292
+ def rewrite_vcall(exp)
293
+ exp.push nil
294
+ rewrite_fcall(exp)
295
+ end
296
+
297
+ def rewrite_yield(exp)
298
+ real_array = exp.pop if exp.size == 3
299
+
300
+ if exp.size == 2 then
301
+ if real_array then
302
+ exp[-1] = s(:array, exp[-1]) if exp[-1][0] != :array
303
+ else
304
+ exp.push(*exp.pop[1..-1]) if exp.last.first == :array
305
+ end
306
+ end
307
+
308
+ exp
309
+ end
310
+
311
+ def rewrite_zarray(exp)
312
+ exp[0] = :array
313
+ exp
314
+ end
315
+ end
316
+
317
+ class PreUnifier < SexpProcessor
318
+ def initialize
319
+ super
320
+ @unsupported.delete :newline
321
+ end
322
+
323
+ def rewrite_call exp
324
+ exp << s(:arglist) if exp.size < 4
325
+ exp.last[0] = :arglist if exp.last.first == :array
326
+ exp
327
+ end
328
+
329
+ def rewrite_fcall exp
330
+ exp << s(:arglist) if exp.size < 3
331
+ if exp[-1][0] == :array then
332
+ has_splat = exp[-1].find { |s| Array === s && s.first == :splat }
333
+ exp[-1] = s(:arglist, exp[-1]) if has_splat
334
+ exp[-1][0] = :arglist
335
+ end
336
+ exp
337
+ end
338
+ end
339
+
340
+ class PostUnifier < SexpProcessor
341
+ include UnifiedRuby
342
+
343
+ def initialize
344
+ super
345
+ @unsupported.delete :newline
346
+ end
347
+ end
348
+
349
+ ##
350
+ # Quick and easy SexpProcessor that unified the sexp structure.
351
+
352
+ class Unifier < CompositeSexpProcessor
353
+ def initialize
354
+ super
355
+ self << PreUnifier.new
356
+ self << PostUnifier.new
357
+ end
358
+ end