ruby-codex 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/codex.rb +222 -0
- data/lib/data_node.rb +91 -0
- data/lib/normalize_ast.rb +107 -0
- metadata +48 -0
data/lib/codex.rb
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
load 'data_node.rb'
|
2
|
+
|
3
|
+
class Codex
|
4
|
+
|
5
|
+
attr_accessor :block, :func, :func_chain, :cond, :ident
|
6
|
+
|
7
|
+
def initialize(db,agg_db)
|
8
|
+
|
9
|
+
# helper procs
|
10
|
+
info = Proc.new do |node|
|
11
|
+
normal_node(node) do |n, norm|
|
12
|
+
norm.pretty_complexity.map { |k,v| v }.reduce(:+)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
func_info = Proc.new do |node|
|
16
|
+
normal_node(node) do |n, norm|
|
17
|
+
norm.pretty_complexity[:send] || 0
|
18
|
+
end
|
19
|
+
end
|
20
|
+
type = Proc.new { |node| node.is_a?(AST::Node) ? node.type.to_s : node.class.to_s }
|
21
|
+
func_name = Proc.new do |node|
|
22
|
+
if type.call(node.children.first) == "const"
|
23
|
+
node.children.first.children[1].to_s + "." + node.children[1].to_s
|
24
|
+
else
|
25
|
+
node.children[1].to_s
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
key = {
|
30
|
+
:type => type
|
31
|
+
}
|
32
|
+
|
33
|
+
data_core = {
|
34
|
+
:file => Proc.new { |node, f, p| f },
|
35
|
+
:project => Proc.new { |node, f, p| p },
|
36
|
+
:line => Proc.new { |node, f, p| node.loc ? node.loc.line : nil },
|
37
|
+
:info => info,
|
38
|
+
:func_info => func_info,
|
39
|
+
:orig_code => Proc.new { |node, f, p| Unparser.unparse(node) rescue nil },
|
40
|
+
}
|
41
|
+
|
42
|
+
combine = {
|
43
|
+
:files => Proc.new { |v| v.map { |x| x[:file] }.uniq },
|
44
|
+
:file_count => Proc.new { |v| v.map { |x| x[:file] }.uniq.count },
|
45
|
+
:projects => Proc.new { |v| v.map { |x| x[:project] }.uniq },
|
46
|
+
:project_count => Proc.new { |v| v.map { |x| x[:project] }.uniq.count },
|
47
|
+
:orig_code => Proc.new { |v| v.sample(10).map do |x|
|
48
|
+
{:code => x[:orig_code], :file => x[:file], :line => x[:line]}
|
49
|
+
end.uniq },
|
50
|
+
:count => Proc.new { |v| v.map { |x| x[:orig_code] }.count },
|
51
|
+
:info => Proc.new { |v| v.first[:info] }, # some process may overwrite
|
52
|
+
:func_info => Proc.new { |v| v.first[:func_info] }
|
53
|
+
}
|
54
|
+
|
55
|
+
@block = DataNode.new(
|
56
|
+
db, agg_db,
|
57
|
+
Proc.new { |x| x.type == :block},
|
58
|
+
key.merge({
|
59
|
+
:func => Proc.new { |x| func_name.call(x.children.first) },
|
60
|
+
:body => Proc.new { |x| normalize_nodes(x.children.last) },
|
61
|
+
:arg_size => Proc.new { |x| x.children[1].children.size },
|
62
|
+
:ret_val => Proc.new do |x|
|
63
|
+
body = x.children.last
|
64
|
+
ret = type.call(body) == "begin" ? body.children.last : body
|
65
|
+
typ = type.call(ret)
|
66
|
+
typ == "send" ? func_name.call(ret) : typ
|
67
|
+
end,
|
68
|
+
:norm_code => Proc.new { |x|
|
69
|
+
normal_node(x) do |x|
|
70
|
+
Unparser.unparse(x.updated(nil, x.children.map.with_index do |y,i|
|
71
|
+
i == 0 ? without_caller(y) : y
|
72
|
+
end)) rescue nil
|
73
|
+
end }
|
74
|
+
}),
|
75
|
+
data_core.merge({
|
76
|
+
:args => Proc.new { |x| x.children[1].children.map{ |y| y.children[0].to_s }}
|
77
|
+
}),
|
78
|
+
combine.merge({
|
79
|
+
:args_list => Proc.new { |v| v.map { |x| x[:args] } }
|
80
|
+
}),
|
81
|
+
Proc.new { |db,keys,vals|
|
82
|
+
query = db.where(:type => keys[:type], :func => keys[:func], :ret_val => keys[:ret_val]).first
|
83
|
+
query_count = query.nil? ? 0 : query.count
|
84
|
+
blocks = db.where(:type => keys[:type], :func => keys[:func]).sum(:count)
|
85
|
+
rets = db.where(:type => keys[:type], :ret_val => keys[:ret_val]).sum(:count)
|
86
|
+
if query_count <= 0
|
87
|
+
{ :keys => keys,
|
88
|
+
:message =>
|
89
|
+
"We've seen #{keys[:func]} blocks returning #{keys[:ret_val]} only #{query_count.to_s} " +
|
90
|
+
"times, though we've seen #{keys[:func]} blocks #{blocks.to_s} times and #{keys[:ret_val]} " +
|
91
|
+
"returned #{rets.to_s} times."
|
92
|
+
} if blocks > 0 && rets > 0
|
93
|
+
end
|
94
|
+
}
|
95
|
+
)
|
96
|
+
|
97
|
+
@func = DataNode.new(
|
98
|
+
db, agg_db,
|
99
|
+
Proc.new { |x| x.type == :send},
|
100
|
+
key.merge({
|
101
|
+
:func => Proc.new { |x| func_name.call(x) },
|
102
|
+
:norm_code => Proc.new { |x| normal_node(x) { |x| Unparser.unparse(without_caller(x)) rescue nil } },
|
103
|
+
:sig => Proc.new { |x| x.children.drop(2).map { |y| type.call(y) } }
|
104
|
+
}),
|
105
|
+
data_core,
|
106
|
+
combine,
|
107
|
+
Proc.new { |db,keys,values|
|
108
|
+
query = db.where(keys).first
|
109
|
+
query_count = query.nil? ? 0 : query.count
|
110
|
+
func = db.where(:type => keys[:type], :func => keys[:func]).sort(:count => -1).limit(1).first
|
111
|
+
alt_count = func.nil? ? 0 : func.count
|
112
|
+
{ :keys => keys,
|
113
|
+
:message =>
|
114
|
+
"Function call #{keys[:norm_code]} has appeared #{query_count.to_s} times, but " +
|
115
|
+
"#{func.norm_code} has appeared #{alt_count.to_s} times."
|
116
|
+
} if alt_count > 10 * (query_count + 1)
|
117
|
+
}
|
118
|
+
)
|
119
|
+
|
120
|
+
@func_chain = func = DataNode.new(
|
121
|
+
db, agg_db,
|
122
|
+
Proc.new { |x| x.type == :send && type.call(x.children.first) == "send" },
|
123
|
+
key.merge({
|
124
|
+
:type => Proc.new { "func_chain" },
|
125
|
+
:f1 => Proc.new { |x| func_name.call(x) },
|
126
|
+
:f2 => Proc.new { |x| func_name.call(x.children.first) }
|
127
|
+
}),
|
128
|
+
data_core.merge({
|
129
|
+
:info => Proc.new { 0 },
|
130
|
+
:func_info => Proc.new { 0 }
|
131
|
+
}),
|
132
|
+
combine,
|
133
|
+
Proc.new do |db,keys,data|
|
134
|
+
query = db.where(keys).first
|
135
|
+
if query.nil? || query.count <= 0
|
136
|
+
fs = [:f1,:f2].map { |f| db.where(:type => "send", :func => keys[f]).size }
|
137
|
+
{ :keys => keys,
|
138
|
+
:message =>
|
139
|
+
"Function #{keys[:f1]} has appeared #{fs[0].to_s} times " +
|
140
|
+
"and #{keys[:f2]} has appeared #{fs[1].to_s} times, but " +
|
141
|
+
"they haven't appeared together."
|
142
|
+
} unless fs[0] <= 0 || fs[1] <= 0
|
143
|
+
end
|
144
|
+
end
|
145
|
+
)
|
146
|
+
|
147
|
+
@cond = DataNode.new(
|
148
|
+
db, agg_db,
|
149
|
+
Proc.new { |x| x.type == :if },
|
150
|
+
key.merge({
|
151
|
+
:norm_code => Proc.new { |x| normalize_nodes(x) },
|
152
|
+
:cond => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children.first) }},
|
153
|
+
:iftrue => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[1]) }},
|
154
|
+
:iffalse => Proc.new { |x| normal_node(x) { |n| Unparser.unparse(n.children[2]) }},
|
155
|
+
}),
|
156
|
+
data_core,
|
157
|
+
combine
|
158
|
+
)
|
159
|
+
|
160
|
+
@ident = DataNode.new(
|
161
|
+
db, agg_db,
|
162
|
+
Proc.new { |x| [:lvasgn, :ivasgn, :cvasgn, :gvasgn].include?(x.type) },
|
163
|
+
key.merge({
|
164
|
+
:type => Proc.new { "ident" },
|
165
|
+
:ident => Proc.new { |x| x.children.first.to_s },
|
166
|
+
}),
|
167
|
+
data_core.merge({
|
168
|
+
:ident_type => Proc.new { |x| type.call(x.children[1]) rescue nil },
|
169
|
+
:info => Proc.new { 0 },
|
170
|
+
:func_info => Proc.new { 0 }
|
171
|
+
}),
|
172
|
+
combine.merge({
|
173
|
+
:ident_types => Proc.new { |v| v.group_by { |y| y[:ident_type] }.map_hash { |x| x.size } }
|
174
|
+
}),
|
175
|
+
Proc.new { |db, keys, data|
|
176
|
+
query = db.where(keys).first
|
177
|
+
if query
|
178
|
+
types = query.ident_types.select { |k,v| ["str","int","float","array","hash"].include? k }
|
179
|
+
types.default = 0
|
180
|
+
pp types
|
181
|
+
first, second = types.sort_by{ |k,v| v*-1 }.take(2)
|
182
|
+
if first[1] > 10 * (second[1] + 1)
|
183
|
+
{ :keys => keys,
|
184
|
+
:message =>
|
185
|
+
"The identifier #{keys[:ident]} has appeared #{first[1].to_s} " +
|
186
|
+
"times as #{first[0].to_s}, but only #{types[data[:ident_type]].to_s} " +
|
187
|
+
"times as #{data[:ident_type].to_s}"
|
188
|
+
} unless first[0].to_s == data[:ident_type]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
}
|
192
|
+
)
|
193
|
+
end
|
194
|
+
|
195
|
+
def without_caller(node)
|
196
|
+
node.updated(nil, node.children.map.with_index do |x,i|
|
197
|
+
i == 0 ? nil : x
|
198
|
+
end)
|
199
|
+
end
|
200
|
+
|
201
|
+
def normal_node(node)
|
202
|
+
norm = ASTNormalizer.new
|
203
|
+
yield norm.rewrite_ast(node), norm
|
204
|
+
end
|
205
|
+
|
206
|
+
def normalize_nodes(nodes)
|
207
|
+
normal_node(nodes) do |n|
|
208
|
+
Unparser.unparse(n) rescue nil
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
class Hash
|
215
|
+
def map_hash
|
216
|
+
result = {}
|
217
|
+
self.each do |key, val|
|
218
|
+
result[key] = yield val
|
219
|
+
end
|
220
|
+
result
|
221
|
+
end
|
222
|
+
end
|
data/lib/data_node.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'parser/current'
|
2
|
+
require 'unparser'
|
3
|
+
require 'ast'
|
4
|
+
require 'mongoid'
|
5
|
+
load 'normalize_ast.rb'
|
6
|
+
|
7
|
+
class DataNode
|
8
|
+
|
9
|
+
def initialize(db, agg_db, type, key_procs = {}, data_procs = {}, combine = {}, query = nil)
|
10
|
+
@type = type
|
11
|
+
@data = Hash.new { |h,k| h[k] = [] }
|
12
|
+
@combine = combine
|
13
|
+
@processed = {}
|
14
|
+
@key_procs = key_procs
|
15
|
+
@data_procs = data_procs
|
16
|
+
@db, @agg_db = db, agg_db
|
17
|
+
@query = query
|
18
|
+
end
|
19
|
+
|
20
|
+
def process_node(ast, file, project, &block)
|
21
|
+
if @type.call(ast)
|
22
|
+
keys = {}
|
23
|
+
data_point = {}
|
24
|
+
@key_procs.each do |name, proc|
|
25
|
+
keys[name] = proc.call(ast, file, project)
|
26
|
+
end
|
27
|
+
@data_procs.each do |name, proc|
|
28
|
+
data_point[name] = proc.call(ast, file, project)
|
29
|
+
end
|
30
|
+
yield keys, data_point if block
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def add_ast(ast, file, project, &block)
|
35
|
+
process_node(ast, file, project) do |keys, data_point|
|
36
|
+
#join_keys = keys.map { |k,v| v }.join("-")
|
37
|
+
@data[keys].push(data_point)
|
38
|
+
yield keys, @data[keys] if block
|
39
|
+
end
|
40
|
+
@data
|
41
|
+
end
|
42
|
+
|
43
|
+
def process_all_ast
|
44
|
+
@processed = {}
|
45
|
+
@data.each do |k,v|
|
46
|
+
@processed[k] = collapse_data(v).merge(k)
|
47
|
+
end
|
48
|
+
@processed
|
49
|
+
end
|
50
|
+
|
51
|
+
def query(ast, file = "N/A", project = "N/A")
|
52
|
+
if @query
|
53
|
+
process_node(ast, file, project) do |keys, data|
|
54
|
+
unlikely = @query.call(@agg_db, keys, data)
|
55
|
+
unlikely ? unlikely : nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def save!
|
61
|
+
process_all_ast
|
62
|
+
#@db.delete_all
|
63
|
+
#@agg_db.delete_all
|
64
|
+
count = 0
|
65
|
+
if !@key_procs.empty?
|
66
|
+
@processed.each do |k,v|
|
67
|
+
@agg_db.new(v).save
|
68
|
+
count += 1
|
69
|
+
puts count
|
70
|
+
end
|
71
|
+
end
|
72
|
+
# @data.each do |k,v|
|
73
|
+
# v.each do |v_a|
|
74
|
+
# @db.new(v_a.merge(k)).save
|
75
|
+
# count += 1
|
76
|
+
# puts count
|
77
|
+
# end
|
78
|
+
# end
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
82
|
+
def collapse_data(data)
|
83
|
+
combined = {}
|
84
|
+
@combine.each do |k,proc|
|
85
|
+
combined[k] = proc.call(data)
|
86
|
+
end
|
87
|
+
combined
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'parser/current'
|
2
|
+
require 'unparser'
|
3
|
+
require 'ast'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
class NameTracker
|
7
|
+
def initialize
|
8
|
+
@var_hash = Hash.new { |h,k| h[k] = "var"+h.size.to_s }
|
9
|
+
@spt_hash = Hash.new { |h,k| h[k] = "*vr"+h.size.to_s }
|
10
|
+
@bar_hash = Hash.new { |h,k| h[k] = "&vr"+h.size.to_s }
|
11
|
+
@sym_hash = Hash.new { |h,k| h[k] = ("sym"+h.size.to_s).to_sym }
|
12
|
+
@str_hash = Hash.new { |h,k| h[k] = "str"+h.size.to_s }
|
13
|
+
@flt_hash = Hash.new { |h,k| h[k] = 0.0+h.size.to_f }
|
14
|
+
@int_hash = Hash.new { |h,k| h[k] = 0+h.size }
|
15
|
+
@mapping = {
|
16
|
+
:str => @str_hash,
|
17
|
+
:sym => @sym_hash,
|
18
|
+
:arg => @var_hash,
|
19
|
+
:float => @flt_hash,
|
20
|
+
:int => @int_hash,
|
21
|
+
:var => @var_hash,
|
22
|
+
:restarg => @spt_hash,
|
23
|
+
:blockarg => @bar_hash
|
24
|
+
}
|
25
|
+
end
|
26
|
+
def rename(type,id)
|
27
|
+
@mapping[type][id]
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
class ASTNormalizer
|
33
|
+
attr_accessor :complexity
|
34
|
+
def initialize
|
35
|
+
@track = NameTracker.new
|
36
|
+
@complexity = Hash.new { |h,k| h[k] = [] }
|
37
|
+
end
|
38
|
+
def update_complexity(type,val)
|
39
|
+
@complexity[type].push(val)
|
40
|
+
end
|
41
|
+
def pretty_complexity
|
42
|
+
measures, out = [:int, :str, :send, :var, :float, :sym], {}
|
43
|
+
@complexity.select { |k,v| measures.include?(k) }.each { |k,v| out[k] = v.size }
|
44
|
+
out
|
45
|
+
end
|
46
|
+
def rewrite_ast(ast)
|
47
|
+
if ast.is_a? AST::Node
|
48
|
+
type = ast.type
|
49
|
+
case type
|
50
|
+
# Variables
|
51
|
+
when :lvar, :ivar, :gvar
|
52
|
+
update_complexity(:var, ast.children.first)
|
53
|
+
ast.updated(nil, ast.children.map { |child| @track.rename(:var, child) })
|
54
|
+
# Assignment
|
55
|
+
when :lvasgn, :gvasgn, :ivasgn, :cvasgn
|
56
|
+
update_complexity(:assignment, ast.children.first)
|
57
|
+
ast.updated(nil, ast.children.map.with_index { |child,i|
|
58
|
+
i == 0 ? @track.rename(:var,child) : rewrite_ast(child)
|
59
|
+
})
|
60
|
+
# Primatives
|
61
|
+
when :int, :float, :str, :sym, :arg, :restarg, :blockarg
|
62
|
+
update_complexity(type, ast.children.first)
|
63
|
+
ast.updated(nil, ast.children.map { |child| @track.rename(type, child) })
|
64
|
+
when :optarg
|
65
|
+
update_complexity(:arg, ast.children.first)
|
66
|
+
ast.updated(nil, ast.children.map.with_index { |child,i|
|
67
|
+
if i == 0
|
68
|
+
@track.rename(:var, child)
|
69
|
+
else
|
70
|
+
rewrite_ast(child)
|
71
|
+
end
|
72
|
+
})
|
73
|
+
# Method definitions
|
74
|
+
when :def
|
75
|
+
update_complexity(:def, ast.children.first)
|
76
|
+
ast.updated(nil, ast.children.map.with_index { |child,i|
|
77
|
+
i == 0 ? :method : rewrite_ast(child)
|
78
|
+
})
|
79
|
+
when :defs
|
80
|
+
update_complexity(:def, ast.children.first)
|
81
|
+
ast.updated(nil, ast.children.map.with_index { |child,i|
|
82
|
+
i == 1 ? :method : rewrite_ast(child)
|
83
|
+
})
|
84
|
+
when :send
|
85
|
+
update_complexity(:send, ast.children[1])
|
86
|
+
ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
|
87
|
+
else
|
88
|
+
ast.updated(nil, ast.children.map { |child| rewrite_ast(child) })
|
89
|
+
end
|
90
|
+
else
|
91
|
+
ast
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
class ASTProcess
|
97
|
+
def store_nodes(ast, &block)
|
98
|
+
if ast.is_a? Parser::AST::Node
|
99
|
+
type = ast.type
|
100
|
+
yield ast, type
|
101
|
+
ast.children.each do |child|
|
102
|
+
store_nodes child, &block
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby-codex
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Ethan Fast
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-07-31 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: A DSL for collecting and aggregating data on Ruby ASTs
|
15
|
+
email: ejhfast@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/normalize_ast.rb
|
21
|
+
- lib/data_node.rb
|
22
|
+
- lib/codex.rb
|
23
|
+
homepage: http://rubygems.org/gems/normalize_ast
|
24
|
+
licenses:
|
25
|
+
- MIT
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 1.8.25
|
45
|
+
signing_key:
|
46
|
+
specification_version: 3
|
47
|
+
summary: Analyze ruby ASTs with Codex
|
48
|
+
test_files: []
|