ruby-codex 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/lib/codex.rb +222 -0
  2. data/lib/data_node.rb +91 -0
  3. data/lib/normalize_ast.rb +107 -0
  4. metadata +48 -0
@@ -0,0 +1,222 @@
1
+ load 'data_node.rb'
2
+
3
+ class Codex
4
+
5
+ attr_accessor :block, :func, :func_chain, :cond, :ident
6
+
7
+ def initialize(db,agg_db)
8
+
9
+ # helper procs
10
+ info = Proc.new do |node|
11
+ normal_node(node) do |n, norm|
12
+ norm.pretty_complexity.map { |k,v| v }.reduce(:+)
13
+ end
14
+ end
15
+ func_info = Proc.new do |node|
16
+ normal_node(node) do |n, norm|
17
+ norm.pretty_complexity[:send] || 0
18
+ end
19
+ end
20
+ type = Proc.new { |node| node.is_a?(AST::Node) ? node.type.to_s : node.class.to_s }
21
+ func_name = Proc.new do |node|
22
+ if type.call(node.children.first) == "const"
23
+ node.children.first.children[1].to_s + "." + node.children[1].to_s
24
+ else
25
+ node.children[1].to_s
26
+ end
27
+ end
28
+
29
+ key = {
30
+ :type => type
31
+ }
32
+
33
+ data_core = {
34
+ :file => Proc.new { |node, f, p| f },
35
+ :project => Proc.new { |node, f, p| p },
36
+ :line => Proc.new { |node, f, p| node.loc ? node.loc.line : nil },
37
+ :info => info,
38
+ :func_info => func_info,
39
+ :orig_code => Proc.new { |node, f, p| Unparser.unparse(node) rescue nil },
40
+ }
41
+
42
+ combine = {
43
+ :files => Proc.new { |v| v.map { |x| x[:file] }.uniq },
44
+ :file_count => Proc.new { |v| v.map { |x| x[:file] }.uniq.count },
45
+ :projects => Proc.new { |v| v.map { |x| x[:project] }.uniq },
46
+ :project_count => Proc.new { |v| v.map { |x| x[:project] }.uniq.count },
47
+ :orig_code => Proc.new { |v| v.sample(10).map do |x|
48
+ {:code => x[:orig_code], :file => x[:file], :line => x[:line]}
49
+ end.uniq },
50
+ :count => Proc.new { |v| v.map { |x| x[:orig_code] }.count },
51
+ :info => Proc.new { |v| v.first[:info] }, # some process may overwrite
52
+ :func_info => Proc.new { |v| v.first[:func_info] }
53
+ }
54
+
55
+ @block = DataNode.new(
56
+ db, agg_db,
57
+ Proc.new { |x| x.type == :block},
58
+ key.merge({
59
+ :func => Proc.new { |x| func_name.call(x.children.first) },
60
+ :body => Proc.new { |x| normalize_nodes(x.children.last) },
61
+ :arg_size => Proc.new { |x| x.children[1].children.size },
62
+ :ret_val => Proc.new do |x|
63
+ body = x.children.last
64
+ ret = type.call(body) == "begin" ? body.children.last : body
65
+ typ = type.call(ret)
66
+ typ == "send" ? func_name.call(ret) : typ
67
+ end,
68
+ :norm_code => Proc.new { |x|
69
+ normal_node(x) do |x|
70
+ Unparser.unparse(x.updated(nil, x.children.map.with_index do |y,i|
71
+ i == 0 ? without_caller(y) : y
72
+ end)) rescue nil
73
+ end }
74
+ }),
75
+ data_core.merge({
76
+ :args => Proc.new { |x| x.children[1].children.map{ |y| y.children[0].to_s }}
77
+ }),
78
+ combine.merge({
79
+ :args_list => Proc.new { |v| v.map { |x| x[:args] } }
80
+ }),
81
+ Proc.new { |db,keys,vals|
82
+ query = db.where(:type => keys[:type], :func => keys[:func], :ret_val => keys[:ret_val]).first
83
+ query_count = query.nil? ? 0 : query.count
84
+ blocks = db.where(:type => keys[:type], :func => keys[:func]).sum(:count)
85
+ rets = db.where(:type => keys[:type], :ret_val => keys[:ret_val]).sum(:count)
86
+ if query_count <= 0
87
+ { :keys => keys,
88
+ :message =>
89
+ "We've seen #{keys[:func]} blocks returning #{keys[:ret_val]} only #{query_count.to_s} " +
90
+ "times, though we've seen #{keys[:func]} blocks #{blocks.to_s} times and #{keys[:ret_val]} " +
91
+ "returned #{rets.to_s} times."
92
+ } if blocks > 0 && rets > 0
93
+ end
94
+ }
95
+ )
96
+
97
+ @func = DataNode.new(
98
+ db, agg_db,
99
+ Proc.new { |x| x.type == :send},
100
+ key.merge({
101
+ :func => Proc.new { |x| func_name.call(x) },
102
+ :norm_code => Proc.new { |x| normal_node(x) { |x| Unparser.unparse(without_caller(x)) rescue nil } },
103
+ :sig => Proc.new { |x| x.children.drop(2).map { |y| type.call(y) } }
104
+ }),
105
+ data_core,
106
+ combine,
107
+ Proc.new { |db,keys,values|
108
+ query = db.where(keys).first
109
+ query_count = query.nil? ? 0 : query.count
110
+ func = db.where(:type => keys[:type], :func => keys[:func]).sort(:count => -1).limit(1).first
111
+ alt_count = func.nil? ? 0 : func.count
112
+ { :keys => keys,
113
+ :message =>
114
+ "Function call #{keys[:norm_code]} has appeared #{query_count.to_s} times, but " +
115
+ "#{func.norm_code} has appeared #{alt_count.to_s} times."
116
+ } if alt_count > 10 * (query_count + 1)
117
+ }
118
+ )
119
+
120
+ @func_chain = func = DataNode.new(
121
+ db, agg_db,
122
+ Proc.new { |x| x.type == :send && type.call(x.children.first) == "send" },
123
+ key.merge({
124
+ :type => Proc.new { "func_chain" },
125
+ :f1 => Proc.new { |x| func_name.call(x) },
126
+ :f2 => Proc.new { |x| func_name.call(x.children.first) }
127
+ }),
128
+ data_core.merge({
129
+ :info => Proc.new { 0 },
130
+ :func_info => Proc.new { 0 }
131
+ }),
132
+ combine,
133
+ Proc.new do |db,keys,data|
134
+ query = db.where(keys).first
135
+ if query.nil? || query.count <= 0
136
+ fs = [:f1,:f2].map { |f| db.where(:type => "send", :func => keys[f]).size }
137
+ { :keys => keys,
138
+ :message =>
139
+ "Function #{keys[:f1]} has appeared #{fs[0].to_s} times " +
140
+ "and #{keys[:f2]} has appeared #{fs[1].to_s} times, but " +
141
+ "they haven't appeared together."
142
+ } unless fs[0] <= 0 || fs[1] <= 0
143
+ end
144
+ end
145
+ )
146
+
147
+ @cond = DataNode.new(
148
+ db, agg_db,
149
+ Proc.new { |x| x.type == :if },
150
+ key.merge({
151
+ :norm_code => Proc.new { |x| normalize_nodes(x) },
152
+ :cond => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children.first) }},
153
+ :iftrue => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[1]) }},
154
+ :iffalse => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[2]) }},
155
+ }),
156
+ data_core,
157
+ combine
158
+ )
159
+
160
+ @ident = DataNode.new(
161
+ db, agg_db,
162
+ Proc.new { |x| [:lvasgn, :ivasgn, :cvasgn, :gvasgn].include?(x.type) },
163
+ key.merge({
164
+ :type => Proc.new { "ident" },
165
+ :ident => Proc.new { |x| x.children.first.to_s },
166
+ }),
167
+ data_core.merge({
168
+ :ident_type => Proc.new { |x| type.call(x.children[1]) rescue nil },
169
+ :info => Proc.new { 0 },
170
+ :func_info => Proc.new { 0 }
171
+ }),
172
+ combine.merge({
173
+ :ident_types => Proc.new { |v| v.group_by { |y| y[:ident_type] }.map_hash { |x| x.size } }
174
+ }),
175
+ Proc.new { |db, keys, data|
176
+ query = db.where(keys).first
177
+ if query
178
+ types = query.ident_types.select { |k,v| ["str","int","float","array","hash"].include? k }
179
+ types.default = 0
180
+ pp types
181
+ first, second = types.sort_by{ |k,v| v*-1 }.take(2)
182
+ if first[1] > 10 * (second[1] + 1)
183
+ { :keys => keys,
184
+ :message =>
185
+ "The identifier #{keys[:ident]} has appeared #{first[1].to_s} " +
186
+ "times as #{first[0].to_s}, but only #{types[data[:ident_type]].to_s} " +
187
+ "times as #{data[:ident_type].to_s}"
188
+ } unless first[0].to_s == data[:ident_type]
189
+ end
190
+ end
191
+ }
192
+ )
193
+ end
194
+
195
+ def without_caller(node)
196
+ node.updated(nil, node.children.map.with_index do |x,i|
197
+ i == 0 ? nil : x
198
+ end)
199
+ end
200
+
201
+ def normal_node(node)
202
+ norm = ASTNormalizer.new
203
+ yield norm.rewrite_ast(node), norm
204
+ end
205
+
206
+ def normalize_nodes(nodes)
207
+ normal_node(nodes) do |n|
208
+ Unparser.unparse(n) rescue nil
209
+ end
210
+ end
211
+
212
+ end
213
+
214
+ class Hash
215
+ def map_hash
216
+ result = {}
217
+ self.each do |key, val|
218
+ result[key] = yield val
219
+ end
220
+ result
221
+ end
222
+ end
@@ -0,0 +1,91 @@
1
+ require 'parser/current'
2
+ require 'unparser'
3
+ require 'ast'
4
+ require 'mongoid'
5
+ load 'normalize_ast.rb'
6
+
7
+ class DataNode
8
+
9
+ def initialize(db, agg_db, type, key_procs = {}, data_procs = {}, combine = {}, query = nil)
10
+ @type = type
11
+ @data = Hash.new { |h,k| h[k] = [] }
12
+ @combine = combine
13
+ @processed = {}
14
+ @key_procs = key_procs
15
+ @data_procs = data_procs
16
+ @db, @agg_db = db, agg_db
17
+ @query = query
18
+ end
19
+
20
+ def process_node(ast, file, project, &block)
21
+ if @type.call(ast)
22
+ keys = {}
23
+ data_point = {}
24
+ @key_procs.each do |name, proc|
25
+ keys[name] = proc.call(ast, file, project)
26
+ end
27
+ @data_procs.each do |name, proc|
28
+ data_point[name] = proc.call(ast, file, project)
29
+ end
30
+ yield keys, data_point if block
31
+ end
32
+ end
33
+
34
+ def add_ast(ast, file, project, &block)
35
+ process_node(ast, file, project) do |keys, data_point|
36
+ #join_keys = keys.map { |k,v| v }.join("-")
37
+ @data[keys].push(data_point)
38
+ yield keys, @data[keys] if block
39
+ end
40
+ @data
41
+ end
42
+
43
+ def process_all_ast
44
+ @processed = {}
45
+ @data.each do |k,v|
46
+ @processed[k] = collapse_data(v).merge(k)
47
+ end
48
+ @processed
49
+ end
50
+
51
+ def query(ast, file = "N/A", project = "N/A")
52
+ if @query
53
+ process_node(ast, file, project) do |keys, data|
54
+ unlikely = @query.call(@agg_db, keys, data)
55
+ unlikely ? unlikely : nil
56
+ end
57
+ end
58
+ end
59
+
60
+ def save!
61
+ process_all_ast
62
+ #@db.delete_all
63
+ #@agg_db.delete_all
64
+ count = 0
65
+ if !@key_procs.empty?
66
+ @processed.each do |k,v|
67
+ @agg_db.new(v).save
68
+ count += 1
69
+ puts count
70
+ end
71
+ end
72
+ # @data.each do |k,v|
73
+ # v.each do |v_a|
74
+ # @db.new(v_a.merge(k)).save
75
+ # count += 1
76
+ # puts count
77
+ # end
78
+ # end
79
+ true
80
+ end
81
+
82
+ def collapse_data(data)
83
+ combined = {}
84
+ @combine.each do |k,proc|
85
+ combined[k] = proc.call(data)
86
+ end
87
+ combined
88
+ end
89
+
90
+ end
91
+
@@ -0,0 +1,107 @@
1
+ require 'parser/current'
2
+ require 'unparser'
3
+ require 'ast'
4
+ require 'pp'
5
+
6
+ class NameTracker
7
+ def initialize
8
+ @var_hash = Hash.new { |h,k| h[k] = "var"+h.size.to_s }
9
+ @spt_hash = Hash.new { |h,k| h[k] = "*vr"+h.size.to_s }
10
+ @bar_hash = Hash.new { |h,k| h[k] = "&vr"+h.size.to_s }
11
+ @sym_hash = Hash.new { |h,k| h[k] = ("sym"+h.size.to_s).to_sym }
12
+ @str_hash = Hash.new { |h,k| h[k] = "str"+h.size.to_s }
13
+ @flt_hash = Hash.new { |h,k| h[k] = 0.0+h.size.to_f }
14
+ @int_hash = Hash.new { |h,k| h[k] = 0+h.size }
15
+ @mapping = {
16
+ :str => @str_hash,
17
+ :sym => @sym_hash,
18
+ :arg => @var_hash,
19
+ :float => @flt_hash,
20
+ :int => @int_hash,
21
+ :var => @var_hash,
22
+ :restarg => @spt_hash,
23
+ :blockarg => @bar_hash
24
+ }
25
+ end
26
+ def rename(type,id)
27
+ @mapping[type][id]
28
+ end
29
+
30
+ end
31
+
32
+ class ASTNormalizer
33
+ attr_accessor :complexity
34
+ def initialize
35
+ @track = NameTracker.new
36
+ @complexity = Hash.new { |h,k| h[k] = [] }
37
+ end
38
+ def update_complexity(type,val)
39
+ @complexity[type].push(val)
40
+ end
41
+ def pretty_complexity
42
+ measures, out = [:int, :str, :send, :var, :float, :sym], {}
43
+ @complexity.select { |k,v| measures.include?(k) }.each { |k,v| out[k] = v.size }
44
+ out
45
+ end
46
+ def rewrite_ast(ast)
47
+ if ast.is_a? AST::Node
48
+ type = ast.type
49
+ case type
50
+ # Variables
51
+ when :lvar, :ivar, :gvar
52
+ update_complexity(:var, ast.children.first)
53
+ ast.updated(nil, ast.children.map { |child| @track.rename(:var, child) })
54
+ # Assignment
55
+ when :lvasgn, :gvasgn, :ivasgn, :cvasgn
56
+ update_complexity(:assignment, ast.children.first)
57
+ ast.updated(nil, ast.children.map.with_index { |child,i|
58
+ i == 0 ? @track.rename(:var,child) : rewrite_ast(child)
59
+ })
60
+ # Primatives
61
+ when :int, :float, :str, :sym, :arg, :restarg, :blockarg
62
+ update_complexity(type, ast.children.first)
63
+ ast.updated(nil, ast.children.map { |child| @track.rename(type, child) })
64
+ when :optarg
65
+ update_complexity(:arg, ast.children.first)
66
+ ast.updated(nil, ast.children.map.with_index { |child,i|
67
+ if i == 0
68
+ @track.rename(:var, child)
69
+ else
70
+ rewrite_ast(child)
71
+ end
72
+ })
73
+ # Method definitions
74
+ when :def
75
+ update_complexity(:def, ast.children.first)
76
+ ast.updated(nil, ast.children.map.with_index { |child,i|
77
+ i == 0 ? :method : rewrite_ast(child)
78
+ })
79
+ when :defs
80
+ update_complexity(:def, ast.children.first)
81
+ ast.updated(nil, ast.children.map.with_index { |child,i|
82
+ i == 1 ? :method : rewrite_ast(child)
83
+ })
84
+ when :send
85
+ update_complexity(:send, ast.children[1])
86
+ ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
87
+ else
88
+ ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
89
+ end
90
+ else
91
+ ast
92
+ end
93
+ end
94
+ end
95
+
96
+ class ASTProcess
97
+ def store_nodes(ast, &block)
98
+ if ast.is_a? Parser::AST::Node
99
+ type = ast.type
100
+ yield ast, type
101
+ ast.children.each do |child|
102
+ store_nodes child, &block
103
+ end
104
+ end
105
+ end
106
+ end
107
+
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-codex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ethan Fast
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-07-31 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A DSL for collecting and aggregating data on Ruby ASTs
15
+ email: ejhfast@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/normalize_ast.rb
21
+ - lib/data_node.rb
22
+ - lib/codex.rb
23
+ homepage: http://rubygems.org/gems/normalize_ast
24
+ licenses:
25
+ - MIT
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ none: false
32
+ requirements:
33
+ - - ! '>='
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 1.8.25
45
+ signing_key:
46
+ specification_version: 3
47
+ summary: Analyze ruby ASTs with Codex
48
+ test_files: []