evoc 3.9.1 → 3.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/evoc/algorithms/closed_rules.rb +49 -56
- data/lib/evoc/analyze.rb +29 -0
- data/lib/evoc/array.rb +45 -33
- data/lib/evoc/evaluate.rb +8 -0
- data/lib/evoc/experiment.rb +56 -35
- data/lib/evoc/recommendation_cache.rb +12 -4
- data/lib/evoc/scenario.rb +70 -3
- data/lib/evoc/version.rb +1 -1
- data/lib/evoc_cli/analyze.rb +11 -0
- data/lib/evoc_cli/experiment.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: debd6763073d247f88a9ba0b80f252b8f31c5a30
|
4
|
+
data.tar.gz: b21e8c6dfd037bc87ec631aff4ff998591347a10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66d8e086a2992bf24a50b786989e7d4a8a1cb1e170f2d94a97f0f8438860f5f31b8abdea7c301422fa83a020009f909e88e181a8391f6b4341c40737ecf7b0ea
|
7
|
+
data.tar.gz: c0dfb6534e75d4663284610e1c3b3198f3be91491203b4238130e23b53f1da00287f0ec9447005710d30f8fcb3dafb79ff1be286f817b7610ec574c70b8a2d3a
|
@@ -1,59 +1,51 @@
|
|
1
1
|
module Evoc
|
2
2
|
class ClosedRules
|
3
|
-
extend Logging
|
4
|
-
|
5
3
|
def self.closed_rules(tx_store:,query:)
|
6
4
|
# @@store = tx_store
|
7
5
|
# create initial trees, one tree per consequent
|
8
6
|
tree = self.initialize_tree(tx_store,query)
|
9
|
-
|
10
|
-
tree.print_tree(1,nil,lambda {|node,pre|
|
7
|
+
# puts "INIT TREE:"
|
8
|
+
# tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
|
11
9
|
closed_rules = Evoc::RuleStore.new(query: query)
|
12
|
-
|
13
|
-
self.extend_nodes(
|
10
|
+
tree.children.each do |consequent|
|
11
|
+
self.extend_nodes(consequent).each do |frequency, closed_sets|
|
14
12
|
closed_sets.each do |closed_set|
|
15
13
|
antecedent = closed_set - consequent.name
|
16
14
|
closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
|
17
15
|
end
|
18
16
|
end
|
19
|
-
|
17
|
+
end
|
20
18
|
return closed_rules
|
21
19
|
end
|
22
20
|
|
23
21
|
private
|
24
22
|
def self.initialize_tree(tx_store, query)
|
25
|
-
rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
|
26
23
|
tree = Tree::TreeNode.new([])
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
24
|
+
# find all items that changed with something in the query
|
25
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
26
|
+
# store all items from the query that have changed with each consequent
|
27
|
+
query_changed_in.each do |tx_id|
|
28
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
29
|
+
antecedent = (query & tx.items)
|
30
|
+
consequents = (tx.items - antecedent)
|
31
|
+
if consequents.size != 0
|
32
|
+
consequents.each do |consequent|
|
33
|
+
consequent_key = [consequent.to_s]
|
34
|
+
if tree[consequent_key].nil?
|
35
|
+
# initialize candidates
|
36
|
+
tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
37
|
+
end
|
38
|
+
txes_consequent = tree[consequent_key].content
|
39
|
+
antecedent.each do |item|
|
40
|
+
union = [item.to_s,consequent.to_s]
|
41
|
+
if tree[consequent_key][union].nil?
|
42
|
+
txes_union = tx_store.transactions_of(item) & txes_consequent
|
43
|
+
tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
31
48
|
end
|
32
|
-
# # find all items that changed with something in the query
|
33
|
-
# query_changed_in = tx_store.transactions_of_list(query)
|
34
|
-
# # store all items from the query that have changed with each consequent
|
35
|
-
# query_changed_in.each do |tx_id|
|
36
|
-
# tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
37
|
-
# antecedent = (query & tx.items)
|
38
|
-
# consequents = (tx.items - antecedent)
|
39
|
-
# if consequents.size != 0
|
40
|
-
# consequents.each do |consequent|
|
41
|
-
# consequent_key = [consequent.to_s]
|
42
|
-
# if tree[consequent_key].nil?
|
43
|
-
# # initialize candidates
|
44
|
-
# tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
45
|
-
# end
|
46
|
-
# txes_consequent = tree[consequent_key].content
|
47
|
-
# antecedent.each do |item|
|
48
|
-
# union = [item.to_s,consequent.to_s]
|
49
|
-
# if tree[consequent_key][union].nil?
|
50
|
-
# txes_union = tx_store.transactions_of(item) & txes_consequent
|
51
|
-
# tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
52
|
-
# end
|
53
|
-
# end
|
54
|
-
# end
|
55
|
-
# end
|
56
|
-
# end
|
57
49
|
return(tree)
|
58
50
|
end
|
59
51
|
|
@@ -63,7 +55,7 @@ module Evoc
|
|
63
55
|
a = current_node
|
64
56
|
b = a.next_sibling
|
65
57
|
while(!b.nil?) do
|
66
|
-
|
58
|
+
# print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
|
67
59
|
ab = a.name | b.name
|
68
60
|
a_txes = a.content
|
69
61
|
b_txes = b.content
|
@@ -75,53 +67,54 @@ module Evoc
|
|
75
67
|
if ab_txes.size > 0
|
76
68
|
case self.compare(a_txes,b_txes)
|
77
69
|
when 'EQUAL'
|
78
|
-
|
79
|
-
|
80
|
-
|
70
|
+
# puts " EQUAL"
|
71
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
72
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
81
73
|
temp = b.previous_sibling
|
82
74
|
root.remove!(b)
|
83
75
|
b = temp
|
84
76
|
a.each {|n| n.rename(ab | n.name)}
|
85
77
|
when 'A_IN_B'
|
86
|
-
|
87
|
-
|
78
|
+
# puts " A in B"
|
79
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
88
80
|
a.each {|n| n.rename(ab | n.name)}
|
89
81
|
when 'B_IN_A'
|
90
|
-
|
91
|
-
|
92
|
-
|
82
|
+
# puts " B in A"
|
83
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
84
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
93
85
|
temp = b.previous_sibling
|
94
86
|
root.remove!(b)
|
95
87
|
b = temp
|
96
88
|
a << Tree::TreeNode.new(ab,ab_txes)
|
97
89
|
when 'NOT_EQUAL'
|
98
|
-
|
99
|
-
|
90
|
+
# puts " NOT EQUAL"
|
91
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
100
92
|
a << Tree::TreeNode.new(ab,ab_txes)
|
101
93
|
end
|
102
94
|
end
|
103
|
-
|
104
|
-
root.print_tree(1,nil,lambda {|node,pre|
|
95
|
+
# puts "NEW TREE:"
|
96
|
+
# root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
|
105
97
|
b = b.next_sibling
|
106
|
-
|
107
|
-
|
98
|
+
# puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
|
99
|
+
# puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
|
100
|
+
# puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
|
108
101
|
end # siblings.each
|
109
102
|
if !a.children.empty?
|
110
|
-
|
103
|
+
# puts "TRAVERSING DOWN"
|
111
104
|
self.extend_nodes(a, closed_rules: closed_rules)
|
112
105
|
end
|
113
106
|
# add node as closed rule if not subsumed by another rule already added
|
114
107
|
rule_frequency = a.content.size
|
115
108
|
rule = a.name
|
116
109
|
if closed_rules[rule_frequency].nil?
|
117
|
-
|
110
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
118
111
|
closed_rules[rule_frequency] = [rule]
|
119
112
|
else
|
120
113
|
if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
|
121
|
-
|
114
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
122
115
|
closed_rules[rule_frequency] << rule
|
123
116
|
else
|
124
|
-
|
117
|
+
# puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
125
118
|
end
|
126
119
|
end
|
127
120
|
current_node = current_node.next_sibling
|
data/lib/evoc/analyze.rb
CHANGED
@@ -28,6 +28,35 @@ module Evoc
|
|
28
28
|
CSV {|row| row << results}
|
29
29
|
end
|
30
30
|
|
31
|
+
def evolution
|
32
|
+
CSV {|row| row << %w(index relevant_index overlap)}
|
33
|
+
self.tx_store.each do |tx|
|
34
|
+
changed_in = self.tx_store.transactions_of_list(tx.items, strict: false, identifier: :index)
|
35
|
+
previous_txes = changed_in.select {|i| i <= tx.index}
|
36
|
+
previous_txes.each do |prev_index|
|
37
|
+
prev_tx = self.tx_store.get_tx(id: prev_index,id_type: :index)
|
38
|
+
overlap = ((prev_tx.items & tx.items).size/tx.size.to_f).round(2)
|
39
|
+
CSV {|row| row << [tx.index,prev_index,overlap]}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def commits
|
45
|
+
unique_items = Set.new
|
46
|
+
changes_so_far = 0
|
47
|
+
self.tx_store.each do |tx|
|
48
|
+
data = Hash.new
|
49
|
+
tx.items.each {|item| unique_items << item}
|
50
|
+
changes_so_far = changes_so_far += tx.items.size
|
51
|
+
data['sha'] = tx.id
|
52
|
+
data['index'] = tx.index
|
53
|
+
data['num_changes'] = tx.items.size
|
54
|
+
data['items_touched_so_far'] = unique_items.size
|
55
|
+
data['moving_average'] = (changes_so_far/(tx.index+1)).to_f.round(2)
|
56
|
+
STDOUT.puts data.to_json
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
31
60
|
def avg_method_changes_per_parsable_file
|
32
61
|
parsable_files_changed = 0
|
33
62
|
method_changes = 0
|
data/lib/evoc/array.rb
CHANGED
@@ -1,37 +1,49 @@
|
|
1
1
|
class Array
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
2
|
+
|
3
|
+
def mean
|
4
|
+
self.inject(0) { |sum, x| sum += x } / self.size.to_f
|
5
|
+
end
|
6
|
+
|
7
|
+
def median(already_sorted=false)
|
8
|
+
return nil if self.empty?
|
9
|
+
array = (already_sorted ? self : self.sort)
|
10
|
+
m_pos = array.size / 2
|
11
|
+
return array.size % 2 == 1 ? array[m_pos] : array[m_pos-1..m_pos].mean
|
12
|
+
end
|
13
|
+
|
14
|
+
def subset?(other)
|
15
|
+
self & other == self
|
16
|
+
end
|
17
|
+
|
18
|
+
def include_any?(other)
|
19
|
+
(self & other).size > 0
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# returns the union of an array of arraya
|
24
|
+
def array_union
|
25
|
+
if union = self.inject(:|)
|
26
|
+
return union
|
27
|
+
else
|
28
|
+
return []
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# returns the intersection of a list of lists
|
34
|
+
def array_intersection
|
35
|
+
if intersection = self.inject(:&)
|
36
|
+
return intersection
|
37
|
+
else
|
38
|
+
return []
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# returns the list of items in self that was not in other
|
44
|
+
def array_difference(other)
|
45
|
+
self.map {|a| a - other}.array_union
|
46
|
+
end
|
35
47
|
|
36
48
|
def self.powerset(set)
|
37
49
|
return [set] if set.empty?
|
data/lib/evoc/evaluate.rb
CHANGED
@@ -11,6 +11,14 @@ module Evoc
|
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
14
|
+
def self.mean_support(rules:)
|
15
|
+
if rules.empty? then return nil end
|
16
|
+
return (rules.inject(0) {|sum,r| sum + r.m_support.value}/rules.size).to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.mean_support10(rules:)
|
20
|
+
return self.mean_support(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_support']).take(10).flatten.take(10))
|
21
|
+
end
|
14
22
|
|
15
23
|
def self.mean_confidence(rules:)
|
16
24
|
if rules.empty? then return nil end
|
data/lib/evoc/experiment.rb
CHANGED
@@ -17,6 +17,8 @@ module Evoc
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def sample_transactions
|
20
|
+
# initialze a random number generator with fixed seed
|
21
|
+
rand = Random.new(self.opts[:seed])
|
20
22
|
# by default we can sample from the whole history
|
21
23
|
sampling_history = Evoc::HistoryStore.base_history
|
22
24
|
STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
|
@@ -74,7 +76,7 @@ module Evoc
|
|
74
76
|
tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
|
75
77
|
tx_sizes_to_sample_from.each do |group_size|
|
76
78
|
if group_size == '*'
|
77
|
-
sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
|
79
|
+
sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
|
78
80
|
sample << sampled_ids
|
79
81
|
STDERR.puts "Sampled #{sampled_ids.size} txes"
|
80
82
|
# remove sampled txes from sampling_history
|
@@ -87,7 +89,7 @@ module Evoc
|
|
87
89
|
if group.size < self.opts[:sample_size]
|
88
90
|
logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
|
89
91
|
end
|
90
|
-
sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
|
92
|
+
sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
|
91
93
|
sample << sampled_ids
|
92
94
|
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
|
93
95
|
else
|
@@ -109,6 +111,8 @@ module Evoc
|
|
109
111
|
# tx_id, query
|
110
112
|
#
|
111
113
|
def generate_queries
|
114
|
+
# initialze a random number generator with fixed seed
|
115
|
+
rand = Random.new(self.opts[:seed])
|
112
116
|
##
|
113
117
|
# write dict
|
114
118
|
##
|
@@ -154,15 +158,15 @@ module Evoc
|
|
154
158
|
specified_sizes.uniq!
|
155
159
|
|
156
160
|
random_sizes = []
|
157
|
-
if self.opts[:random_select] then random_sizes << Random.new.rand(
|
161
|
+
if self.opts[:random_select] then random_sizes << Random.new.rand(self.opts[:minimum_query_size]..(tx_size-1)) end
|
158
162
|
|
159
163
|
sampled_queries = []
|
160
164
|
# only specified sizes
|
161
165
|
if random_sizes.empty? & !specified_sizes.empty?
|
162
|
-
sampled_queries = specified_sizes.map {|s| items.sample(s)}
|
166
|
+
sampled_queries = specified_sizes.map {|s| items.sample(s, random: rand)}
|
163
167
|
# only random sizes
|
164
168
|
elsif !random_sizes.empty? & specified_sizes.empty?
|
165
|
-
sampled_queries = random_sizes.map {|s| items.sample(s)}
|
169
|
+
sampled_queries = random_sizes.map {|s| items.sample(s, random: rand)}
|
166
170
|
# random + specified = randomly sample in range defined by specified
|
167
171
|
# ex:
|
168
172
|
# specified = [1,3,10,20]
|
@@ -172,9 +176,9 @@ module Evoc
|
|
172
176
|
# 2. randomly select X in specified = Y
|
173
177
|
# 3. randomly select Y in tx
|
174
178
|
elsif !random_sizes.empty? & !specified_sizes.empty?
|
175
|
-
specified_sizes.select! {|s| (s < tx_size) & (s
|
176
|
-
if randomly_sampled_size = specified_sizes.sample #2.
|
177
|
-
sampled_queries = [items.sample(randomly_sampled_size)] #3.
|
179
|
+
specified_sizes.select! {|s| (s < tx_size) & (s >= self.opts[:minimum_query_size])} #1.
|
180
|
+
if randomly_sampled_size = specified_sizes.sample(random: rand) #2.
|
181
|
+
sampled_queries = [items.sample(randomly_sampled_size, random: rand)] #3.
|
178
182
|
end
|
179
183
|
end
|
180
184
|
|
@@ -189,6 +193,9 @@ module Evoc
|
|
189
193
|
logger.debug "The size of the sampled query was equal to the size of the transaction, skipping.. Tx ID: #{tx_id}. Query size: #{query.size}"
|
190
194
|
next
|
191
195
|
end
|
196
|
+
if query.size < self.opts[:minimum_query_size]
|
197
|
+
next
|
198
|
+
end
|
192
199
|
CSV {|row| row << [tx_id,query.join(',')]}
|
193
200
|
end
|
194
201
|
else
|
@@ -231,8 +238,6 @@ module Evoc
|
|
231
238
|
factor_max_size = self.opts[:max_size].nil? ? nil : self.opts[:max_size].map {|s| [ 'max_size',s ]}
|
232
239
|
# Factor: Model age aka number of commits between query and last tx in history
|
233
240
|
factor_model_age = self.opts[:model_age].nil? ? nil : self.opts[:model_age].map {|s| [ 'model_age',s ]}
|
234
|
-
# Factor: Algorithm
|
235
|
-
factor_algorithms = self.opts[:algorithms].nil? ? nil : self.opts[:algorithms].map {|a| ['algorithm',a]}
|
236
241
|
# Factor: Measures
|
237
242
|
factor_measures = self.opts[:measures].map {|c| ['measures',c]}
|
238
243
|
# Factor: Aggregator
|
@@ -253,7 +258,8 @@ module Evoc
|
|
253
258
|
num_lines = File.read(self.opts[:queries]).each_line.count-1
|
254
259
|
current_line = 1
|
255
260
|
|
256
|
-
|
261
|
+
# compact removes nil values (not used factors)
|
262
|
+
factors = [factor_model_size,factor_max_size,factor_model_age,factor_measures,factor_permutation,factor_aggregators].compact
|
257
263
|
num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
|
258
264
|
invalid_configuration = 0
|
259
265
|
last_error = 'no errors'
|
@@ -280,38 +286,53 @@ module Evoc
|
|
280
286
|
end
|
281
287
|
|
282
288
|
current_scenario = 1
|
283
|
-
# - compact removes nil values (not used factors)
|
284
289
|
# - the splat operator '*' turns the array into parameters for #product
|
285
290
|
# - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
|
286
291
|
factors.first.product(*factors[1..-1]).each do |scenario|
|
287
|
-
# Print progress to stderr
|
288
|
-
STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
|
289
|
-
if invalid_configuration > 0
|
290
|
-
STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
|
291
|
-
else
|
292
|
-
STDERR.print " \r"
|
293
|
-
end
|
294
|
-
|
295
292
|
params = query_hash.merge(scenario.to_h)
|
296
293
|
params[:case_id] = self.opts[:case_id]
|
297
294
|
params[:granularity] = self.opts[:granularity]
|
298
295
|
# initialize scenario
|
299
296
|
s = Evoc::Scenario.new(params)
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
297
|
+
scenario_stats = {}
|
298
|
+
if self.opts[:stats]
|
299
|
+
scenario_stats = s.stats
|
300
|
+
end
|
301
|
+
# Factor: Algorithm
|
302
|
+
self.opts[:algorithms].each do |algorithm|
|
303
|
+
s.algorithm = algorithm
|
304
|
+
# Print progress to stderr
|
305
|
+
STDERR.print "(#{self.opts[:case_id]}) Executing #{algorithm} on scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
|
306
|
+
if invalid_configuration > 0
|
307
|
+
STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
|
308
|
+
else
|
309
|
+
STDERR.print " \r"
|
310
|
+
end
|
311
|
+
|
312
|
+
begin
|
313
|
+
Evoc::RecommendationCache.get_recommendation(algorithm: algorithm,
|
314
|
+
query: s.query,
|
315
|
+
model_start: s.model_start,
|
316
|
+
model_end: s.model_end,
|
317
|
+
max_size: s.max_size,
|
318
|
+
aggregator: s.aggregator,
|
319
|
+
measures: s.measures)
|
320
|
+
Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators],
|
321
|
+
topk: self.opts[:topk],
|
322
|
+
unique_consequents: self.opts[:unique_consequents],
|
323
|
+
expected_outcome: s.expected_outcome,
|
324
|
+
measure_combination: s.measures)
|
325
|
+
|
326
|
+
# build json line by merging hashes
|
327
|
+
$stdout.puts s.to_h
|
328
|
+
.merge(scenario_stats)
|
329
|
+
.merge({topk: self.opts[:topk], date: tx.date})
|
330
|
+
.merge(Evoc::RecommendationCache.to_h(measures: s.measures))
|
331
|
+
.to_json
|
332
|
+
rescue ArgumentError => e
|
333
|
+
invalid_configuration += 1
|
334
|
+
last_error = e.message
|
335
|
+
end
|
315
336
|
end
|
316
337
|
current_scenario += 1
|
317
338
|
end
|
@@ -12,14 +12,22 @@ module Evoc
|
|
12
12
|
attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
|
13
13
|
end
|
14
14
|
|
15
|
-
def self.recommendation_cached?(algorithm:,
|
15
|
+
def self.recommendation_cached?(algorithm:,
|
16
|
+
query:,
|
17
|
+
model_start:,
|
18
|
+
model_end:,
|
19
|
+
max_size: nil)
|
16
20
|
return self.tag == [algorithm,query,model_start,model_end,max_size].hash
|
17
21
|
end
|
18
22
|
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
def self.get_recommendation(algorithm:,
|
25
|
+
query:,
|
26
|
+
model_start:,
|
27
|
+
model_end:,
|
28
|
+
max_size: nil,
|
29
|
+
aggregator: nil,
|
30
|
+
measures: [])
|
23
31
|
# check if a new base recommendation needs to be generated
|
24
32
|
tag = [algorithm,query,model_start,model_end,max_size].hash
|
25
33
|
if self.tag != tag
|
data/lib/evoc/scenario.rb
CHANGED
@@ -16,6 +16,7 @@ module Evoc
|
|
16
16
|
:model_size,
|
17
17
|
:model_age,
|
18
18
|
:max_size,
|
19
|
+
:stats,
|
19
20
|
:opts
|
20
21
|
|
21
22
|
def initialize(opts = Hash.new)
|
@@ -23,7 +24,7 @@ module Evoc
|
|
23
24
|
self.opts = opts
|
24
25
|
self.scenario_id = opts.hash
|
25
26
|
|
26
|
-
|
27
|
+
self.tx_id = opts[:tx_id]
|
27
28
|
self.model_age = opts[:model_age]
|
28
29
|
opts.each do |attribute,value|
|
29
30
|
self.send("#{attribute}=", value)
|
@@ -55,11 +56,17 @@ module Evoc
|
|
55
56
|
|
56
57
|
|
57
58
|
def to_h
|
58
|
-
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
|
59
|
+
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures stats)
|
59
60
|
hash = Hash.new
|
60
61
|
fields.each do |key|
|
61
62
|
value = self.method(key).call
|
62
|
-
|
63
|
+
if value.is_a?(Array)
|
64
|
+
hash[key] = value.join(',')
|
65
|
+
elsif value.is_a?(Hash)
|
66
|
+
hash.merge!(value)
|
67
|
+
else
|
68
|
+
hash[key] = value
|
69
|
+
end
|
63
70
|
end
|
64
71
|
return hash
|
65
72
|
end
|
@@ -228,5 +235,65 @@ module Evoc
|
|
228
235
|
def tx_size
|
229
236
|
self.tx.size
|
230
237
|
end
|
238
|
+
|
239
|
+
def stats
|
240
|
+
time_start = Time.now
|
241
|
+
history = Evoc::HistoryStore.get_history(self.model_start,self.model_end,self.max_size)
|
242
|
+
relevant_transactions = Set.new
|
243
|
+
relevant_items = Set.new
|
244
|
+
relevant_ages = []
|
245
|
+
avg_age_of_relevant_transactions = 0
|
246
|
+
avg_size_of_relevant_transactions = 0
|
247
|
+
files_changed = Set.new
|
248
|
+
num_methods_changed = 0
|
249
|
+
num_new_items = 0
|
250
|
+
# @avg_size_of_relevant_transactions = 0
|
251
|
+
# mean_age_of_relevant
|
252
|
+
# media_age_of_relevant
|
253
|
+
# ratio_new_items
|
254
|
+
self.query.each do |item|
|
255
|
+
if change = history.int_2_name[item]
|
256
|
+
change = change.split(':')
|
257
|
+
files_changed << change[0]
|
258
|
+
if change.size > 1
|
259
|
+
num_methods_changed = num_methods_changed + 1
|
260
|
+
end
|
261
|
+
indexes_of_previous_changes = history.transactions_of(item, identifier: :index)
|
262
|
+
if new_item = (indexes_of_previous_changes.size == 0)
|
263
|
+
num_new_items = num_new_items + 1
|
264
|
+
else
|
265
|
+
indexes_of_previous_changes.each do |tx_index|
|
266
|
+
relevant_so_far = relevant_transactions.size
|
267
|
+
relevant_transactions << tx_index
|
268
|
+
new_relevant = (relevant_transactions.size > relevant_so_far)
|
269
|
+
if new_relevant
|
270
|
+
age = (self.tx_index - tx_index)
|
271
|
+
relevant_ages << age
|
272
|
+
tx = history.get_tx(id: tx_index,id_type: :index)
|
273
|
+
avg_size_of_relevant_transactions = avg_size_of_relevant_transactions + tx.size
|
274
|
+
avg_age_of_relevant_transactions = avg_age_of_relevant_transactions + age
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
else
|
279
|
+
num_new_items = num_new_items + 1
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
num_relevant_transactions = relevant_transactions.size
|
284
|
+
|
285
|
+
time_end = Time.now
|
286
|
+
time_generate_stats = TimeDifference.between(time_start,time_end).in_seconds.round(8)
|
287
|
+
{time_generate_stats: time_generate_stats,
|
288
|
+
num_files_changed: files_changed.size,
|
289
|
+
num_methods_changed: num_methods_changed,
|
290
|
+
num_new_items: num_new_items,
|
291
|
+
num_relevant_transactions: num_relevant_transactions,
|
292
|
+
median_age_of_relevant_transactions: relevant_ages.median,
|
293
|
+
avg_age_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_age_of_relevant_transactions/num_relevant_transactions).to_f,
|
294
|
+
avg_size_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_size_of_relevant_transactions/num_relevant_transactions).to_f}
|
295
|
+
end
|
296
|
+
|
297
|
+
|
231
298
|
end
|
232
299
|
end
|
data/lib/evoc/version.rb
CHANGED
data/lib/evoc_cli/analyze.rb
CHANGED
@@ -6,6 +6,17 @@ module EvocCLI
|
|
6
6
|
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
7
|
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
8
8
|
|
9
|
+
desc "evolution","Outputs where the items of all transactions previously changed."
|
10
|
+
def evolution
|
11
|
+
a = Evoc::Analyze.new(options)
|
12
|
+
a.evolution
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "commits","Outputs the items touched so far and the moving average of items per transactions for every transaction in the history"
|
16
|
+
def commits
|
17
|
+
a = Evoc::Analyze.new(options)
|
18
|
+
a.commits
|
19
|
+
end
|
9
20
|
|
10
21
|
method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
|
11
22
|
desc "measure_values","Empirically investigate the range of interestingness measures"
|
data/lib/evoc_cli/experiment.rb
CHANGED
@@ -19,6 +19,7 @@ module EvocCLI
|
|
19
19
|
|
20
20
|
##
|
21
21
|
# sample_transactions
|
22
|
+
method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
|
22
23
|
method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
|
23
24
|
method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
|
24
25
|
method_option :recent, type: :numeric, desc: "Filter to the X most recent transactions"
|
@@ -38,6 +39,7 @@ module EvocCLI
|
|
38
39
|
##
|
39
40
|
# generate_queries
|
40
41
|
#
|
42
|
+
method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
|
41
43
|
method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
|
42
44
|
method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
|
43
45
|
method_option :select, aliases: '-s', type: :array, default: [],
|
@@ -46,6 +48,7 @@ module EvocCLI
|
|
46
48
|
desc: "Reverse version of --select (select \"all but\" X)"
|
47
49
|
method_option :percentage, aliases: '-e', type: :array,
|
48
50
|
desc: "Percentage of items to select for each query"
|
51
|
+
method_option :minimum_query_size, type: :numeric, default: 1, desc: "Only sample queries of at least this size"
|
49
52
|
method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
|
50
53
|
method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
|
51
54
|
method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
|
@@ -75,6 +78,7 @@ module EvocCLI
|
|
75
78
|
method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
|
76
79
|
method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
|
77
80
|
method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
|
81
|
+
method_option :stats, type: :boolean, required: false, desc: "Generate extra stats describing each scenario"
|
78
82
|
desc "execute_scenarios [options]",""
|
79
83
|
def execute_scenarios
|
80
84
|
if !options[:permutation].nil?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|