doppelganger 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,105 @@
1
+ require "#{Doppelganger::LIBPATH}doppelganger/unified_ruby"
2
+
3
+ MethodDef = Struct.new(:name, :args, :body, :node, :filename, :line, :flat_body_array, :last_line)
4
+ BlockNode = Struct.new(:body, :node, :filename, :line, :flat_body_array, :last_line)
5
+ IterNode = Struct.new(:call_node, :asgn_node, :body, :node, :filename, :line, :flat_body_array, :last_line)
6
+
7
+ module Doppelganger
8
+ # This class goes through all the ruby files in a directory and parses it into Sexp's.
9
+ # It then exracts the definitions and stores them all and then includes the NodeAnalysis module
10
+ # with allows a number of comparisons.
11
+ class Extractor < SexpProcessor
12
+ include UnifiedRuby
13
+
14
+ attr_reader :sexp_blocks, :dir
15
+
16
+ def initialize
17
+ super
18
+ self.auto_shift_type = true
19
+ @rp = RubyParser.new
20
+ @sexp_blocks = []
21
+ end
22
+
23
+ # This goes through all the files in the directory and parses them extracting
24
+ # all the block-like nodes.
25
+ def extract_blocks(dir)
26
+ @dir = File.expand_path(dir)
27
+ if File.directory? @dir
28
+ Find.find(*Dir["#{self.dir}/**/*.rb"]) do |filename|
29
+ if File.file? filename
30
+ sexp = @rp.process(File.read(filename), filename)
31
+ self.process(sexp)
32
+ end
33
+ end
34
+ elsif File.file? @dir
35
+ sexp = @rp.process(File.read(@dir), @dir)
36
+ self.process(sexp)
37
+ end
38
+ @sexp_blocks
39
+ end
40
+
41
+ def process_defn(exp)
42
+ method = MethodDef.new
43
+ method.name = exp.shift
44
+ method.args = process(exp.shift)
45
+ method.last_line = exp.last_line_number
46
+ method.body = process(exp.shift)
47
+ method.node = s(:defn, method.name, method.args, method.body.dup)
48
+ method.flat_body_array = method.body.dup.remove_literals.to_flat_ary
49
+ method.filename = exp.file
50
+ method.line = exp.line
51
+
52
+ unless method.body == s(:scope, s(:block, s(:nil)))
53
+ @sexp_blocks << method
54
+ end
55
+ method.node
56
+ end
57
+
58
+ def process_block(exp)
59
+ block_node = BlockNode.new
60
+ block_node.last_line = exp.last_line_number
61
+ if exp.size > 1
62
+ block_node.body = s()
63
+ until (exp.empty?) do
64
+ block_node.body << process(exp.shift)
65
+ end
66
+ block_node.node = s(:block, *block_node.body.dup)
67
+ else
68
+ block_node.body = exp.shift
69
+ block_node.node = s(:block, block_node.body.dup)
70
+ end
71
+
72
+ block_node.flat_body_array = block_node.body.dup.remove_literals.to_flat_ary
73
+ block_node.filename = exp.file
74
+ block_node.line = exp.line
75
+
76
+ unless block_node.body == s(:nil)
77
+ @sexp_blocks << block_node
78
+ end
79
+ block_node.node
80
+ end
81
+
82
+ def process_iter(exp)
83
+ unless exp[2][0] == :block
84
+ iter_node = IterNode.new
85
+ iter_node.call_node = process(exp.shift)
86
+ iter_node.asgn_node = process(exp.shift)
87
+ iter_node.last_line = exp.last_line_number
88
+ iter_node.body = process(exp.shift)
89
+ iter_node.node = s(:iter, iter_node.call_node, iter_node.asgn_node, iter_node.body.dup)
90
+ iter_node.flat_body_array = iter_node.body.dup.remove_literals.to_flat_ary
91
+ iter_node.filename = exp.file
92
+ iter_node.line = exp.line
93
+
94
+ @sexp_blocks << iter_node
95
+ iter_node.node
96
+ else
97
+ call_node = process(exp.shift)
98
+ asgn_node = process(exp.shift)
99
+ body = process(exp.shift)
100
+ s(:iter, call_node, asgn_node, body)
101
+ end
102
+ end
103
+
104
+ end
105
+ end
@@ -0,0 +1,23 @@
1
+ class Array
2
+ # Return all duplicate elments (uses <tt>==</tt> for comparison).
3
+ def duplicates?(element)
4
+ (self.select {|elem| elem == element}).size > 1
5
+ end
6
+
7
+ def stepwise(compare_method) #:nodoc:
8
+ self.each do |element1|
9
+ self.each do |element2|
10
+ next if element1.send(compare_method) == element2.send(compare_method)
11
+ yield element1, element2
12
+ end
13
+ end
14
+ end
15
+
16
+ def comparing_collect #:nodoc:
17
+ accumulator = [] # collect implementation copied from Rubinius
18
+ stepwise do |element1, element2|
19
+ accumulator << element1 if yield(element1, element2)
20
+ end
21
+ accumulator.compact.uniq
22
+ end
23
+ end
@@ -0,0 +1,89 @@
1
+ # This is pulled in part from Ryan Davis' Sexp additions in Flay.
2
+
3
+ class Sexp
4
+ # Performs the block on every Sexp in this sexp.
5
+ def deep_each(&block)
6
+ self.each_sexp do |sexp|
7
+ block[sexp]
8
+ sexp.deep_each(&block)
9
+ end
10
+ end
11
+
12
+ # Finds the last line of the Sexp if that information is available.
13
+ def last_line_number
14
+ line_number = nil
15
+ self.deep_each do |sub_node|
16
+ if sub_node.respond_to? :line
17
+ line_number = sub_node.line
18
+ end
19
+ end
20
+ line_number
21
+ end
22
+
23
+ # Maps all sub Sexps into a new Sexp, if the node isn't a Sexp
24
+ # performs the block and maps the result into the new Sexp.
25
+ def map_sexps
26
+ self.inject(s()) do |sexps, sexp|
27
+ unless Sexp === sexp
28
+ sexps << sexp
29
+ else
30
+ sexps << yield(sexp)
31
+ end
32
+ sexps
33
+ end
34
+ end
35
+
36
+ # Rejects all objects in the Sexp that return true for the block.
37
+ def deep_reject(&block)
38
+ output_sexp = self.reject do |node|
39
+ block[node]
40
+ end
41
+ output_sexp.map_sexps do |sexp|
42
+ sexp.deep_reject(&block)
43
+ end
44
+ end
45
+
46
+ # Removes all literals from the Sexp (Symbols aren't excluded as they are used internally
47
+ # by Sexp for node names which identifies structure important for comparison.)
48
+ def remove_literals
49
+ self.deep_reject do |node|
50
+ !((node.is_a?(Symbol)) || (node.is_a?(Sexp)))
51
+ end
52
+ end
53
+
54
+ # Iterates through each child Sexp of the current Sexp.
55
+ def each_sexp
56
+ self.each do |sexp|
57
+ next unless Sexp === sexp
58
+ yield sexp
59
+ end
60
+ end
61
+
62
+ # Performs the block on every Sexp in this sexp, looking for one that returns true.
63
+ def deep_any?(&block)
64
+ self.any_sexp? do |sexp|
65
+ block[sexp] || sexp.deep_any?(&block)
66
+ end
67
+ end
68
+
69
+ # Iterates through each child Sexp of the current Sexp and looks for any Sexp
70
+ # that returns true for the block.
71
+ def any_sexp?
72
+ self.any? do |sexp|
73
+ next unless Sexp === sexp
74
+ yield sexp
75
+ end
76
+ end
77
+
78
+ # Determines if the passed in block node is contained with in the Sexp node.
79
+ def contains_block?(block_node)
80
+ self.deep_any? do |sexp|
81
+ sexp == block_node
82
+ end
83
+ end
84
+
85
+ # First turns the Sexp into an Array then flattens it.
86
+ def to_flat_ary
87
+ self.to_a.flatten
88
+ end
89
+ end
@@ -0,0 +1,121 @@
1
+ module Doppelganger
2
+ # This handles the comparison of the Ruby nodes.
3
+ #
4
+ # This will use various iterators to compare all the diffent block-like nodes
5
+ # in your code base and find similar or duplicate nodes.
6
+ class NodeAnalysis
7
+
8
+ attr_accessor :sexp_blocks
9
+
10
+ def initialize(sexp_blocks)
11
+ @sexp_blocks = sexp_blocks
12
+ end
13
+
14
+ # Are there any duplicates in the code base.
15
+ def duplication?
16
+ not duplicates.empty?
17
+ end
18
+
19
+ # Finds blocks of code that are exact duplicates, node for node. All duplicate
20
+ # blocks are grouped together.
21
+ def duplicates
22
+ block_nodes = @sexp_blocks.map{ |sblock| sblock.body.remove_literals }
23
+ (@sexp_blocks.inject([]) do |duplicate_blocks, sblock|
24
+ node_body = sblock.body.remove_literals
25
+ if block_nodes.duplicates?(node_body)
26
+ if duplicate_blocks.map{|sb| sb.first.body.remove_literals}.include?(node_body)
27
+ duplicate_blocks.find{|sb| sb.first.body.remove_literals == node_body } << sblock
28
+ else
29
+ duplicate_blocks << [sblock]
30
+ end
31
+ end
32
+ duplicate_blocks
33
+ end).compact.uniq
34
+ end
35
+
36
+ # Finds block-like nodes that differ from another node by the threshold or less, but are not duplicates.
37
+ def diff(threshold, progress_bar = nil)
38
+ diff_nodes = []
39
+ @compared_node_pairs = []
40
+ stepwise_sblocks(progress_bar) do |block_node_1, block_node_2|
41
+ if threshold >= Diff::LCS.diff(block_node_1.flat_body_array, block_node_2.flat_body_array).size
42
+ diff_nodes << [block_node_1, block_node_2]
43
+ end
44
+ @compared_node_pairs << [block_node_1, block_node_2]
45
+ end
46
+ @compared_node_pairs = []
47
+ cleanup_descendant_duplicate_matches(diff_nodes)
48
+ end
49
+
50
+ # Finds block-like nodes that differ by a given threshold percentage or less, but are not duplicates.
51
+ def percent_diff(percentage, progress_bar = nil)
52
+ # To calculate the percentage we can do this in one of two ways we can compare
53
+ # total differences (the diff set flattened) over the total nodes (the flattened bodies added)
54
+ # or we can compare the number of change sets (the size of the diff) over the average number of nodes
55
+ # in the two methods.
56
+ # Not sure which is best but I've gone with the former for now.
57
+ diff_nodes = []
58
+ @compared_node_pairs = []
59
+ stepwise_sblocks(progress_bar) do |block_node_1, block_node_2|
60
+ total_nodes = block_node_1.flat_body_array.size + block_node_2.flat_body_array.size
61
+ diff_size = Diff::LCS.diff(block_node_1.flat_body_array, block_node_2.flat_body_array).flatten.size
62
+ if percentage >= (diff_size.to_f/total_nodes.to_f * 100)
63
+ diff_nodes << [block_node_1, block_node_2]
64
+ end
65
+ @compared_node_pairs << [block_node_1, block_node_2]
66
+ end
67
+ @compared_node_pairs = []
68
+ cleanup_descendant_duplicate_matches(diff_nodes)
69
+ end
70
+
71
+ protected
72
+ def stepwise_sblocks(progress_bar = nil)
73
+ @sexp_blocks.dup.each do |node1|
74
+ @sexp_blocks.dup.each do |node2|
75
+ progress_bar.inc unless progress_bar.nil?
76
+ next if nodes_compared? node1, node2
77
+ next if node1.body.remove_literals == node2.body.remove_literals
78
+ next if one_node_is_child_of_the_other? node1, node2
79
+ yield node1, node2
80
+ end
81
+ end
82
+ end
83
+
84
+ def node_includes_block?(element, block_node)
85
+ (element.filename == block_node.filename) &&
86
+ ((element.line..(element.last_line+1)).include?(block_node.line) ||
87
+ element.node.contains_block?(block_node.node))
88
+ end
89
+
90
+ def cleanup_descendant_duplicate_matches(diff_nodes)
91
+ diff_nodes.reject do |block_node_pair|
92
+ ancestor_pair_in_results?(block_node_pair, diff_nodes)
93
+ end
94
+ end
95
+
96
+ def ancestor_pair_in_results?(pair, results)
97
+ matches = results.select do |block_node_pair|
98
+ block_node_pair.any?{|n| node_includes_block?(n, pair.first)} &&
99
+ block_node_pair.any?{|n| node_includes_block?(n, pair.last)}
100
+ end
101
+ matches.size > 1
102
+ end
103
+
104
+ def one_node_is_child_of_the_other?(node1, node2)
105
+ if node1.is_a?(MethodDef) && node2.is_a?(BlockNode)
106
+ (node_includes_block?(node1, node2))
107
+ elsif node1.is_a?(BlockNode) && node2.is_a?(MethodDef)
108
+ (node_includes_block?(node2, node1))
109
+ else
110
+ (node_includes_block?(node1, node2) || node_includes_block?(node2, node1))
111
+ end
112
+ end
113
+
114
+ def nodes_compared?(node1, node2)
115
+ @compared_node_pairs.any? do |block_node_pair|
116
+ block_pair_nodes = block_node_pair.map(&:node)
117
+ block_pair_nodes.include?(node1.node) && block_pair_nodes.include?(node2.node)
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,358 @@
1
+ # This is copied from parse tree but with the some of the "raise" statements
2
+ # removed. Also the rewrite_masgn method has been removed because of
3
+ # many places where masgn is used that doesn't meet the requirements
4
+ # of the first if/raise line.
5
+
6
+ $TESTING ||= false
7
+
8
+ module UnifiedRuby
9
+ def process exp
10
+ exp = Sexp.from_array exp unless Sexp === exp or exp.nil?
11
+ super
12
+ end
13
+
14
+ def rewrite_argscat exp
15
+ _, ary, val = exp
16
+ ary = s(:array, ary) unless ary.first == :array
17
+ ary << s(:splat, val)
18
+ end
19
+
20
+ def rewrite_argspush exp
21
+ exp[0] = :arglist
22
+ exp
23
+ end
24
+
25
+ def rewrite_attrasgn(exp)
26
+ last = exp.last
27
+
28
+ if Sexp === last then
29
+ last[0] = :arglist if last[0] == :array
30
+ else
31
+ exp << s(:arglist)
32
+ end
33
+
34
+ exp
35
+ end
36
+
37
+ def rewrite_begin(exp)
38
+ if exp.size > 2
39
+ exp
40
+ else
41
+ exp.last
42
+ end
43
+ end
44
+
45
+ def rewrite_block_pass exp
46
+ if exp.size == 3 then
47
+ _, block, recv = exp
48
+ case recv.first
49
+ when :super then
50
+ recv << s(:block_pass, block)
51
+ exp = recv
52
+ when :call then
53
+ recv.last << s(:block_pass, block)
54
+ exp = recv
55
+ else
56
+ exp
57
+ end
58
+ end
59
+
60
+ exp
61
+ end
62
+
63
+ def rewrite_bmethod(exp)
64
+ _, args, body = exp
65
+
66
+ args ||= s(:array)
67
+ body ||= s(:block)
68
+
69
+ args = s(:args, args) unless args[0] == :array
70
+
71
+ args = args[1] if args[1] && args[1][0] == :masgn # TODO: clean up
72
+ args = args[1] if args[1] && args[1][0] == :array
73
+ args[0] = :args
74
+
75
+ # this is ugly because rewriters are depth first.
76
+ # TODO: maybe we could come up with some way to do both forms of rewriting.
77
+ args.map! { |s|
78
+ if Sexp === s
79
+ case s[0]
80
+ when :lasgn then
81
+ s[1]
82
+ when :splat then
83
+ :"*#{s[1][1]}"
84
+ else
85
+ raise "huh?: #{s.inspect}"
86
+ end
87
+ else
88
+ s
89
+ end
90
+ }
91
+
92
+ body = s(:block, body) unless body[0] == :block
93
+ body.insert 1, args
94
+
95
+ s(:scope, body)
96
+ end
97
+
98
+ def rewrite_call(exp)
99
+ args = exp.last
100
+ case args
101
+ when nil
102
+ exp.pop
103
+ when Array
104
+ case args.first
105
+ when :array, :arglist then
106
+ args[0] = :arglist
107
+ when :argscat, :splat then
108
+ exp[-1] = s(:arglist, args)
109
+ else
110
+ raise "unknown type in call #{args.first.inspect} in #{exp.inspect}"
111
+ end
112
+ return exp
113
+ end
114
+
115
+ exp << s(:arglist)
116
+
117
+ exp
118
+ end
119
+
120
+ def rewrite_dasgn(exp)
121
+ exp[0] = :lasgn
122
+ exp
123
+ end
124
+
125
+ alias :rewrite_dasgn_curr :rewrite_dasgn
126
+
127
+ ##
128
+ # :defn is one of the most complex of all the ASTs in ruby. We do
129
+ # one of 3 different translations:
130
+ #
131
+ # 1) From:
132
+ #
133
+ # s(:defn, :name, s(:scope, s(:block, s(:args, ...), ...)))
134
+ # s(:defn, :name, s(:bmethod, s(:masgn, s(:dasgn_curr, :args)), s(:block, ...)))
135
+ # s(:defn, :name, s(:fbody, s(:bmethod, s(:masgn, s(:dasgn_curr, :splat)), s(:block, ...))))
136
+ #
137
+ # to:
138
+ #
139
+ # s(:defn, :name, s(:args, ...), s(:scope, s:(block, ...)))
140
+ #
141
+ # 2) From:
142
+ #
143
+ # s(:defn, :writer=, s(:attrset, :@name))
144
+ #
145
+ # to:
146
+ #
147
+ # s(:defn, :writer=, s(:args), s(:attrset, :@name))
148
+ #
149
+ # 3) From:
150
+ #
151
+ # s(:defn, :reader, s(:ivar, :@name))
152
+ #
153
+ # to:
154
+ #
155
+ # s(:defn, :reader, s(:args), s(:ivar, :@name))
156
+ #
157
+
158
+ def rewrite_defn(exp)
159
+ weirdo = exp.ivar || exp.attrset
160
+ fbody = exp.fbody(true)
161
+
162
+ weirdo ||= fbody.cfunc if fbody
163
+
164
+ exp.push(fbody.scope) if fbody unless weirdo
165
+
166
+ args = exp.scope.block.args(true) unless weirdo
167
+ exp.insert 2, args if args
168
+
169
+ # move block_arg up and in
170
+ block_arg = exp.scope.block.block_arg(true) rescue nil
171
+ if block_arg
172
+ block = args.block(true)
173
+ args << :"&#{block_arg.last}"
174
+ args << block if block
175
+ end
176
+
177
+ # patch up attr_accessor methods
178
+ if weirdo then
179
+ case
180
+ when fbody && fbody.cfunc then
181
+ exp.insert 2, s(:args, :"*args")
182
+ when exp.ivar then
183
+ exp.insert 2, s(:args)
184
+ when exp.attrset then
185
+ exp.insert 2, s(:args, :arg)
186
+ else
187
+ raise "unknown wierdo: #{wierdo.inpsect}"
188
+ end
189
+ end
190
+
191
+ exp
192
+ end
193
+
194
+ def rewrite_defs(exp)
195
+ receiver = exp.delete_at 1
196
+
197
+ # TODO: I think this would be better as rewrite_scope, but that breaks others
198
+ exp = s(exp.shift, exp.shift,
199
+ s(:scope,
200
+ s(:block, exp.scope.args))) if exp.scope && exp.scope.args
201
+
202
+ result = rewrite_defn(exp)
203
+ result.insert 1, receiver
204
+
205
+ result
206
+ end
207
+
208
+ def rewrite_dmethod(exp)
209
+ exp.shift # type
210
+ exp.shift # dmethod name
211
+ exp.shift # scope / block / body
212
+ end
213
+
214
+ def rewrite_dvar(exp)
215
+ exp[0] = :lvar
216
+ exp
217
+ end
218
+
219
+ def rewrite_fcall(exp)
220
+ exp[0] = :call
221
+ exp.insert 1, nil
222
+
223
+ rewrite_call(exp)
224
+ end
225
+
226
+ def rewrite_op_asgn1(exp)
227
+ exp[2][0] = :arglist # if exp[2][0] == :array
228
+ exp
229
+ end
230
+
231
+ def rewrite_resbody(exp)
232
+ exp[1] ||= s(:array) # no args
233
+
234
+ body = exp[2]
235
+ if body then
236
+ case body.first
237
+ when :lasgn, :iasgn then
238
+ exp[1] << exp.delete_at(2) if body[-1] == s(:gvar, :$!)
239
+ when :block then
240
+ exp[1] << body.delete_at(1) if [:lasgn, :iasgn].include?(body[1][0]) &&
241
+ body[1][-1] == s(:gvar, :$!)
242
+ end
243
+ end
244
+
245
+ exp << nil if exp.size == 2 # no body
246
+
247
+ exp
248
+ end
249
+
250
+ def rewrite_rescue(exp)
251
+ # SKETCHY HACK return exp if exp.size > 4
252
+ ignored = exp.shift
253
+ body = exp.shift unless exp.first.first == :resbody
254
+ resbody = exp.shift
255
+ els = exp.shift unless exp.first.first == :resbody unless exp.empty?
256
+ rest = exp.empty? ? nil : exp # graceful re-rewriting (see rewrite_begin)
257
+
258
+ resbodies = []
259
+
260
+ unless rest then
261
+ while resbody do
262
+ resbodies << resbody
263
+ resbody = resbody.resbody(true)
264
+ end
265
+
266
+ resbodies.each do |resbody|
267
+ if resbody[2] && resbody[2][0] == :block && resbody[2].size == 2 then
268
+ resbody[2] = resbody[2][-1]
269
+ end
270
+ end
271
+ else
272
+ resbodies = [resbody] + rest
273
+ end
274
+
275
+ resbodies << els if els
276
+
277
+ s(:rescue, body, *resbodies).compact
278
+ end
279
+
280
+ def rewrite_splat(exp)
281
+ good = [:arglist, :argspush, :array, :svalue, :yield, :super].include? context.first
282
+ exp = s(:array, exp) unless good
283
+ exp
284
+ end
285
+
286
+ def rewrite_super(exp)
287
+ return exp if exp.structure.flatten.first(3) == [:super, :array, :splat]
288
+ exp.push(*exp.pop[1..-1]) if exp.size == 2 && exp.last.first == :array
289
+ exp
290
+ end
291
+
292
+ def rewrite_vcall(exp)
293
+ exp.push nil
294
+ rewrite_fcall(exp)
295
+ end
296
+
297
+ def rewrite_yield(exp)
298
+ real_array = exp.pop if exp.size == 3
299
+
300
+ if exp.size == 2 then
301
+ if real_array then
302
+ exp[-1] = s(:array, exp[-1]) if exp[-1][0] != :array
303
+ else
304
+ exp.push(*exp.pop[1..-1]) if exp.last.first == :array
305
+ end
306
+ end
307
+
308
+ exp
309
+ end
310
+
311
+ def rewrite_zarray(exp)
312
+ exp[0] = :array
313
+ exp
314
+ end
315
+ end
316
+
317
+ class PreUnifier < SexpProcessor
318
+ def initialize
319
+ super
320
+ @unsupported.delete :newline
321
+ end
322
+
323
+ def rewrite_call exp
324
+ exp << s(:arglist) if exp.size < 4
325
+ exp.last[0] = :arglist if exp.last.first == :array
326
+ exp
327
+ end
328
+
329
+ def rewrite_fcall exp
330
+ exp << s(:arglist) if exp.size < 3
331
+ if exp[-1][0] == :array then
332
+ has_splat = exp[-1].find { |s| Array === s && s.first == :splat }
333
+ exp[-1] = s(:arglist, exp[-1]) if has_splat
334
+ exp[-1][0] = :arglist
335
+ end
336
+ exp
337
+ end
338
+ end
339
+
340
+ class PostUnifier < SexpProcessor
341
+ include UnifiedRuby
342
+
343
+ def initialize
344
+ super
345
+ @unsupported.delete :newline
346
+ end
347
+ end
348
+
349
+ ##
350
+ # Quick and easy SexpProcessor that unified the sexp structure.
351
+
352
+ class Unifier < CompositeSexpProcessor
353
+ def initialize
354
+ super
355
+ self << PreUnifier.new
356
+ self << PostUnifier.new
357
+ end
358
+ end