ruby_codex 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/data_node.rb ADDED
@@ -0,0 +1,91 @@
1
+ require 'parser/current'
2
+ require 'unparser'
3
+ require 'ast'
4
+ require 'mongoid'
5
+ load 'normalize_ast.rb'
6
+
7
+ class DataNode
8
+
9
+ def initialize(db, agg_db, type, key_procs = {}, data_procs = {}, combine = {}, query = nil)
10
+ @type = type
11
+ @data = Hash.new { |h,k| h[k] = [] }
12
+ @combine = combine
13
+ @processed = {}
14
+ @key_procs = key_procs
15
+ @data_procs = data_procs
16
+ @db, @agg_db = db, agg_db
17
+ @query = query
18
+ end
19
+
20
+ def process_node(ast, file, project, &block)
21
+ if @type.call(ast)
22
+ keys = {}
23
+ data_point = {}
24
+ @key_procs.each do |name, proc|
25
+ keys[name] = proc.call(ast, file, project)
26
+ end
27
+ @data_procs.each do |name, proc|
28
+ data_point[name] = proc.call(ast, file, project)
29
+ end
30
+ yield keys, data_point if block
31
+ end
32
+ end
33
+
34
+ def add_ast(ast, file, project, &block)
35
+ process_node(ast, file, project) do |keys, data_point|
36
+ #join_keys = keys.map { |k,v| v }.join("-")
37
+ @data[keys].push(data_point)
38
+ yield keys, @data[keys] if block
39
+ end
40
+ @data
41
+ end
42
+
43
+ def process_all_ast
44
+ @processed = {}
45
+ @data.each do |k,v|
46
+ @processed[k] = collapse_data(v).merge(k)
47
+ end
48
+ @processed
49
+ end
50
+
51
+ def query(ast, file = "N/A", project = "N/A")
52
+ if @query
53
+ process_node(ast, file, project) do |keys, data|
54
+ unlikely = @query.call(@agg_db, keys, data)
55
+ unlikely ? unlikely : nil
56
+ end
57
+ end
58
+ end
59
+
60
+ def save!
61
+ process_all_ast
62
+ #@db.delete_all
63
+ #@agg_db.delete_all
64
+ count = 0
65
+ if !@key_procs.empty?
66
+ @processed.each do |k,v|
67
+ @agg_db.new(v).save
68
+ count += 1
69
+ puts count
70
+ end
71
+ end
72
+ # @data.each do |k,v|
73
+ # v.each do |v_a|
74
+ # @db.new(v_a.merge(k)).save
75
+ # count += 1
76
+ # puts count
77
+ # end
78
+ # end
79
+ true
80
+ end
81
+
82
+ def collapse_data(data)
83
+ combined = {}
84
+ @combine.each do |k,proc|
85
+ combined[k] = proc.call(data)
86
+ end
87
+ combined
88
+ end
89
+
90
+ end
91
+
@@ -0,0 +1,107 @@
1
+ require 'parser/current'
2
+ require 'unparser'
3
+ require 'ast'
4
+ require 'pp'
5
+
6
+ class NameTracker
7
+ def initialize
8
+ @var_hash = Hash.new { |h,k| h[k] = "var"+h.size.to_s }
9
+ @spt_hash = Hash.new { |h,k| h[k] = "*vr"+h.size.to_s }
10
+ @bar_hash = Hash.new { |h,k| h[k] = "&vr"+h.size.to_s }
11
+ @sym_hash = Hash.new { |h,k| h[k] = ("sym"+h.size.to_s).to_sym }
12
+ @str_hash = Hash.new { |h,k| h[k] = "str"+h.size.to_s }
13
+ @flt_hash = Hash.new { |h,k| h[k] = 0.0+h.size.to_f }
14
+ @int_hash = Hash.new { |h,k| h[k] = 0+h.size }
15
+ @mapping = {
16
+ :str => @str_hash,
17
+ :sym => @sym_hash,
18
+ :arg => @var_hash,
19
+ :float => @flt_hash,
20
+ :int => @int_hash,
21
+ :var => @var_hash,
22
+ :restarg => @spt_hash,
23
+ :blockarg => @bar_hash
24
+ }
25
+ end
26
+ def rename(type,id)
27
+ @mapping[type][id]
28
+ end
29
+
30
+ end
31
+
32
+ class ASTNormalizer
33
+ attr_accessor :complexity
34
+ def initialize
35
+ @track = NameTracker.new
36
+ @complexity = Hash.new { |h,k| h[k] = [] }
37
+ end
38
+ def update_complexity(type,val)
39
+ @complexity[type].push(val)
40
+ end
41
+ def pretty_complexity
42
+ measures, out = [:int, :str, :send, :var, :float, :sym], {}
43
+ @complexity.select { |k,v| measures.include?(k) }.each { |k,v| out[k] = v.size }
44
+ out
45
+ end
46
+ def rewrite_ast(ast)
47
+ if ast.is_a? AST::Node
48
+ type = ast.type
49
+ case type
50
+ # Variables
51
+ when :lvar, :ivar, :gvar
52
+ update_complexity(:var, ast.children.first)
53
+ ast.updated(nil, ast.children.map { |child| @track.rename(:var, child) })
54
+ # Assignment
55
+ when :lvasgn, :gvasgn, :ivasgn, :cvasgn
56
+ update_complexity(:assignment, ast.children.first)
57
+ ast.updated(nil, ast.children.map.with_index { |child,i|
58
+ i == 0 ? @track.rename(:var,child) : rewrite_ast(child)
59
+ })
60
+ # Primatives
61
+ when :int, :float, :str, :sym, :arg, :restarg, :blockarg
62
+ update_complexity(type, ast.children.first)
63
+ ast.updated(nil, ast.children.map { |child| @track.rename(type, child) })
64
+ when :optarg
65
+ update_complexity(:arg, ast.children.first)
66
+ ast.updated(nil, ast.children.map.with_index { |child,i|
67
+ if i == 0
68
+ @track.rename(:var, child)
69
+ else
70
+ rewrite_ast(child)
71
+ end
72
+ })
73
+ # Method definitions
74
+ when :def
75
+ update_complexity(:def, ast.children.first)
76
+ ast.updated(nil, ast.children.map.with_index { |child,i|
77
+ i == 0 ? :method : rewrite_ast(child)
78
+ })
79
+ when :defs
80
+ update_complexity(:def, ast.children.first)
81
+ ast.updated(nil, ast.children.map.with_index { |child,i|
82
+ i == 1 ? :method : rewrite_ast(child)
83
+ })
84
+ when :send
85
+ update_complexity(:send, ast.children[1])
86
+ ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
87
+ else
88
+ ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
89
+ end
90
+ else
91
+ ast
92
+ end
93
+ end
94
+ end
95
+
96
+ class ASTProcess
97
+ def store_nodes(ast, &block)
98
+ if ast.is_a? Parser::AST::Node
99
+ type = ast.type
100
+ yield ast, type
101
+ ast.children.each do |child|
102
+ store_nodes child, &block
103
+ end
104
+ end
105
+ end
106
+ end
107
+
data/lib/ruby_codex.rb ADDED
@@ -0,0 +1,222 @@
1
+ load 'data_node.rb'
2
+
3
+ class Codex
4
+
5
+ attr_accessor :block, :func, :func_chain, :cond, :ident
6
+
7
+ def initialize(db,agg_db)
8
+
9
+ # helper procs
10
+ info = Proc.new do |node|
11
+ normal_node(node) do |n, norm|
12
+ norm.pretty_complexity.map { |k,v| v }.reduce(:+)
13
+ end
14
+ end
15
+ func_info = Proc.new do |node|
16
+ normal_node(node) do |n, norm|
17
+ norm.pretty_complexity[:send] || 0
18
+ end
19
+ end
20
+ type = Proc.new { |node| node.is_a?(AST::Node) ? node.type.to_s : node.class.to_s }
21
+ func_name = Proc.new do |node|
22
+ if type.call(node.children.first) == "const"
23
+ node.children.first.children[1].to_s + "." + node.children[1].to_s
24
+ else
25
+ node.children[1].to_s
26
+ end
27
+ end
28
+
29
+ key = {
30
+ :type => type
31
+ }
32
+
33
+ data_core = {
34
+ :file => Proc.new { |node, f, p| f },
35
+ :project => Proc.new { |node, f, p| p },
36
+ :line => Proc.new { |node, f, p| node.loc ? node.loc.line : nil },
37
+ :info => info,
38
+ :func_info => func_info,
39
+ :orig_code => Proc.new { |node, f, p| Unparser.unparse(node) rescue nil },
40
+ }
41
+
42
+ combine = {
43
+ :files => Proc.new { |v| v.map { |x| x[:file] }.uniq },
44
+ :file_count => Proc.new { |v| v.map { |x| x[:file] }.uniq.count },
45
+ :projects => Proc.new { |v| v.map { |x| x[:project] }.uniq },
46
+ :project_count => Proc.new { |v| v.map { |x| x[:project] }.uniq.count },
47
+ :orig_code => Proc.new { |v| v.sample(10).map do |x|
48
+ {:code => x[:orig_code], :file => x[:file], :line => x[:line]}
49
+ end.uniq },
50
+ :count => Proc.new { |v| v.map { |x| x[:orig_code] }.count },
51
+ :info => Proc.new { |v| v.first[:info] }, # some process may overwrite
52
+ :func_info => Proc.new { |v| v.first[:func_info] }
53
+ }
54
+
55
+ @block = DataNode.new(
56
+ db, agg_db,
57
+ Proc.new { |x| x.type == :block},
58
+ key.merge({
59
+ :func => Proc.new { |x| func_name.call(x.children.first) },
60
+ :body => Proc.new { |x| normalize_nodes(x.children.last) },
61
+ :arg_size => Proc.new { |x| x.children[1].children.size },
62
+ :ret_val => Proc.new do |x|
63
+ body = x.children.last
64
+ ret = type.call(body) == "begin" ? body.children.last : body
65
+ typ = type.call(ret)
66
+ typ == "send" ? func_name.call(ret) : typ
67
+ end,
68
+ :norm_code => Proc.new { |x|
69
+ normal_node(x) do |x|
70
+ Unparser.unparse(x.updated(nil, x.children.map.with_index do |y,i|
71
+ i == 0 ? without_caller(y) : y
72
+ end)) rescue nil
73
+ end }
74
+ }),
75
+ data_core.merge({
76
+ :args => Proc.new { |x| x.children[1].children.map{ |y| y.children[0].to_s }}
77
+ }),
78
+ combine.merge({
79
+ :args_list => Proc.new { |v| v.map { |x| x[:args] } }
80
+ }),
81
+ Proc.new { |db,keys,vals|
82
+ query = db.where(:type => keys[:type], :func => keys[:func], :ret_val => keys[:ret_val]).first
83
+ query_count = query.nil? ? 0 : query.count
84
+ blocks = db.where(:type => keys[:type], :func => keys[:func]).sum(:count)
85
+ rets = db.where(:type => keys[:type], :ret_val => keys[:ret_val]).sum(:count)
86
+ if query_count <= 0
87
+ { :keys => keys,
88
+ :message =>
89
+ "We've seen #{keys[:func]} blocks returning #{keys[:ret_val]} only #{query_count.to_s} " +
90
+ "times, though we've seen #{keys[:func]} blocks #{blocks.to_s} times and #{keys[:ret_val]} " +
91
+ "returned #{rets.to_s} times."
92
+ } if blocks > 0 && rets > 0
93
+ end
94
+ }
95
+ )
96
+
97
+ @func = DataNode.new(
98
+ db, agg_db,
99
+ Proc.new { |x| x.type == :send},
100
+ key.merge({
101
+ :func => Proc.new { |x| func_name.call(x) },
102
+ :norm_code => Proc.new { |x| normal_node(x) { |x| Unparser.unparse(without_caller(x)) rescue nil } },
103
+ :sig => Proc.new { |x| x.children.drop(2).map { |y| type.call(y) } }
104
+ }),
105
+ data_core,
106
+ combine,
107
+ Proc.new { |db,keys,values|
108
+ query = db.where(keys).first
109
+ query_count = query.nil? ? 0 : query.count
110
+ func = db.where(:type => keys[:type], :func => keys[:func]).sort(:count => -1).limit(1).first
111
+ alt_count = func.nil? ? 0 : func.count
112
+ { :keys => keys,
113
+ :message =>
114
+ "Function call #{keys[:norm_code]} has appeared #{query_count.to_s} times, but " +
115
+ "#{func.norm_code} has appeared #{alt_count.to_s} times."
116
+ } if alt_count > 10 * (query_count + 1)
117
+ }
118
+ )
119
+
120
+ @func_chain = func = DataNode.new(
121
+ db, agg_db,
122
+ Proc.new { |x| x.type == :send && type.call(x.children.first) == "send" },
123
+ key.merge({
124
+ :type => Proc.new { "func_chain" },
125
+ :f1 => Proc.new { |x| func_name.call(x) },
126
+ :f2 => Proc.new { |x| func_name.call(x.children.first) }
127
+ }),
128
+ data_core.merge({
129
+ :info => Proc.new { 0 },
130
+ :func_info => Proc.new { 0 }
131
+ }),
132
+ combine,
133
+ Proc.new do |db,keys,data|
134
+ query = db.where(keys).first
135
+ if query.nil? || query.count <= 0
136
+ fs = [:f1,:f2].map { |f| db.where(:type => "send", :func => keys[f]).size }
137
+ { :keys => keys,
138
+ :message =>
139
+ "Function #{keys[:f1]} has appeared #{fs[0].to_s} times " +
140
+ "and #{keys[:f2]} has appeared #{fs[1].to_s} times, but " +
141
+ "they haven't appeared together."
142
+ } unless fs[0] <= 0 || fs[1] <= 0
143
+ end
144
+ end
145
+ )
146
+
147
+ @cond = DataNode.new(
148
+ db, agg_db,
149
+ Proc.new { |x| x.type == :if },
150
+ key.merge({
151
+ :norm_code => Proc.new { |x| normalize_nodes(x) },
152
+ :cond => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children.first) }},
153
+ :iftrue => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[1]) }},
154
+ :iffalse => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[2]) }},
155
+ }),
156
+ data_core,
157
+ combine
158
+ )
159
+
160
+ @ident = DataNode.new(
161
+ db, agg_db,
162
+ Proc.new { |x| [:lvasgn, :ivasgn, :cvasgn, :gvasgn].include?(x.type) },
163
+ key.merge({
164
+ :type => Proc.new { "ident" },
165
+ :ident => Proc.new { |x| x.children.first.to_s },
166
+ }),
167
+ data_core.merge({
168
+ :ident_type => Proc.new { |x| type.call(x.children[1]) rescue nil },
169
+ :info => Proc.new { 0 },
170
+ :func_info => Proc.new { 0 }
171
+ }),
172
+ combine.merge({
173
+ :ident_types => Proc.new { |v| v.group_by { |y| y[:ident_type] }.map_hash { |x| x.size } }
174
+ }),
175
+ Proc.new { |db, keys, data|
176
+ query = db.where(keys).first
177
+ if query
178
+ types = query.ident_types.select { |k,v| ["str","int","float","array","hash"].include? k }
179
+ types.default = 0
180
+ pp types
181
+ first, second = types.sort_by{ |k,v| v*-1 }.take(2)
182
+ if first[1] > 10 * (second[1] + 1)
183
+ { :keys => keys,
184
+ :message =>
185
+ "The identifier #{keys[:ident]} has appeared #{first[1].to_s} " +
186
+ "times as #{first[0].to_s}, but only #{types[data[:ident_type]].to_s} " +
187
+ "times as #{data[:ident_type].to_s}"
188
+ } unless first[0].to_s == data[:ident_type]
189
+ end
190
+ end
191
+ }
192
+ )
193
+ end
194
+
195
+ def without_caller(node)
196
+ node.updated(nil, node.children.map.with_index do |x,i|
197
+ i == 0 ? nil : x
198
+ end)
199
+ end
200
+
201
+ def normal_node(node)
202
+ norm = ASTNormalizer.new
203
+ yield norm.rewrite_ast(node), norm
204
+ end
205
+
206
+ def normalize_nodes(nodes)
207
+ normal_node(nodes) do |n|
208
+ Unparser.unparse(n) rescue nil
209
+ end
210
+ end
211
+
212
+ end
213
+
214
+ class Hash
215
+ def map_hash
216
+ result = {}
217
+ self.each do |key, val|
218
+ result[key] = yield val
219
+ end
220
+ result
221
+ end
222
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_codex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ethan Fast
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-31 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A DSL for collecting and aggregating data on Ruby ASTs
15
+ email: ejhfast@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/normalize_ast.rb
21
+ - lib/data_node.rb
22
+ - lib/ruby_codex.rb
23
+ homepage: http://rubygems.org/gems/ruby-codex
24
+ licenses:
25
+ - MIT
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 1.8.25
45
+ signing_key:
46
+ specification_version: 3
47
+ summary: Analyze ruby ASTs with Codex
48
+ test_files: []