evoc 3.9.1 → 3.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/evoc/algorithms/closed_rules.rb +49 -56
- data/lib/evoc/analyze.rb +29 -0
- data/lib/evoc/array.rb +45 -33
- data/lib/evoc/evaluate.rb +8 -0
- data/lib/evoc/experiment.rb +56 -35
- data/lib/evoc/recommendation_cache.rb +12 -4
- data/lib/evoc/scenario.rb +70 -3
- data/lib/evoc/version.rb +1 -1
- data/lib/evoc_cli/analyze.rb +11 -0
- data/lib/evoc_cli/experiment.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: debd6763073d247f88a9ba0b80f252b8f31c5a30
|
4
|
+
data.tar.gz: b21e8c6dfd037bc87ec631aff4ff998591347a10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66d8e086a2992bf24a50b786989e7d4a8a1cb1e170f2d94a97f0f8438860f5f31b8abdea7c301422fa83a020009f909e88e181a8391f6b4341c40737ecf7b0ea
|
7
|
+
data.tar.gz: c0dfb6534e75d4663284610e1c3b3198f3be91491203b4238130e23b53f1da00287f0ec9447005710d30f8fcb3dafb79ff1be286f817b7610ec574c70b8a2d3a
|
@@ -1,59 +1,51 @@
|
|
1
1
|
module Evoc
|
2
2
|
class ClosedRules
|
3
|
-
extend Logging
|
4
|
-
|
5
3
|
def self.closed_rules(tx_store:,query:)
|
6
4
|
# @@store = tx_store
|
7
5
|
# create initial trees, one tree per consequent
|
8
6
|
tree = self.initialize_tree(tx_store,query)
|
9
|
-
|
10
|
-
tree.print_tree(1,nil,lambda {|node,pre|
|
7
|
+
# puts "INIT TREE:"
|
8
|
+
# tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
|
11
9
|
closed_rules = Evoc::RuleStore.new(query: query)
|
12
|
-
|
13
|
-
self.extend_nodes(
|
10
|
+
tree.children.each do |consequent|
|
11
|
+
self.extend_nodes(consequent).each do |frequency, closed_sets|
|
14
12
|
closed_sets.each do |closed_set|
|
15
13
|
antecedent = closed_set - consequent.name
|
16
14
|
closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
|
17
15
|
end
|
18
16
|
end
|
19
|
-
|
17
|
+
end
|
20
18
|
return closed_rules
|
21
19
|
end
|
22
20
|
|
23
21
|
private
|
24
22
|
def self.initialize_tree(tx_store, query)
|
25
|
-
rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
|
26
23
|
tree = Tree::TreeNode.new([])
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
24
|
+
# find all items that changed with something in the query
|
25
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
26
|
+
# store all items from the query that have changed with each consequent
|
27
|
+
query_changed_in.each do |tx_id|
|
28
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
29
|
+
antecedent = (query & tx.items)
|
30
|
+
consequents = (tx.items - antecedent)
|
31
|
+
if consequents.size != 0
|
32
|
+
consequents.each do |consequent|
|
33
|
+
consequent_key = [consequent.to_s]
|
34
|
+
if tree[consequent_key].nil?
|
35
|
+
# initialize candidates
|
36
|
+
tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
37
|
+
end
|
38
|
+
txes_consequent = tree[consequent_key].content
|
39
|
+
antecedent.each do |item|
|
40
|
+
union = [item.to_s,consequent.to_s]
|
41
|
+
if tree[consequent_key][union].nil?
|
42
|
+
txes_union = tx_store.transactions_of(item) & txes_consequent
|
43
|
+
tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
31
48
|
end
|
32
|
-
# # find all items that changed with something in the query
|
33
|
-
# query_changed_in = tx_store.transactions_of_list(query)
|
34
|
-
# # store all items from the query that have changed with each consequent
|
35
|
-
# query_changed_in.each do |tx_id|
|
36
|
-
# tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
37
|
-
# antecedent = (query & tx.items)
|
38
|
-
# consequents = (tx.items - antecedent)
|
39
|
-
# if consequents.size != 0
|
40
|
-
# consequents.each do |consequent|
|
41
|
-
# consequent_key = [consequent.to_s]
|
42
|
-
# if tree[consequent_key].nil?
|
43
|
-
# # initialize candidates
|
44
|
-
# tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
45
|
-
# end
|
46
|
-
# txes_consequent = tree[consequent_key].content
|
47
|
-
# antecedent.each do |item|
|
48
|
-
# union = [item.to_s,consequent.to_s]
|
49
|
-
# if tree[consequent_key][union].nil?
|
50
|
-
# txes_union = tx_store.transactions_of(item) & txes_consequent
|
51
|
-
# tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
52
|
-
# end
|
53
|
-
# end
|
54
|
-
# end
|
55
|
-
# end
|
56
|
-
# end
|
57
49
|
return(tree)
|
58
50
|
end
|
59
51
|
|
@@ -63,7 +55,7 @@ module Evoc
|
|
63
55
|
a = current_node
|
64
56
|
b = a.next_sibling
|
65
57
|
while(!b.nil?) do
|
66
|
-
|
58
|
+
# print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
|
67
59
|
ab = a.name | b.name
|
68
60
|
a_txes = a.content
|
69
61
|
b_txes = b.content
|
@@ -75,53 +67,54 @@ module Evoc
|
|
75
67
|
if ab_txes.size > 0
|
76
68
|
case self.compare(a_txes,b_txes)
|
77
69
|
when 'EQUAL'
|
78
|
-
|
79
|
-
|
80
|
-
|
70
|
+
# puts " EQUAL"
|
71
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
72
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
81
73
|
temp = b.previous_sibling
|
82
74
|
root.remove!(b)
|
83
75
|
b = temp
|
84
76
|
a.each {|n| n.rename(ab | n.name)}
|
85
77
|
when 'A_IN_B'
|
86
|
-
|
87
|
-
|
78
|
+
# puts " A in B"
|
79
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
88
80
|
a.each {|n| n.rename(ab | n.name)}
|
89
81
|
when 'B_IN_A'
|
90
|
-
|
91
|
-
|
92
|
-
|
82
|
+
# puts " B in A"
|
83
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
84
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
93
85
|
temp = b.previous_sibling
|
94
86
|
root.remove!(b)
|
95
87
|
b = temp
|
96
88
|
a << Tree::TreeNode.new(ab,ab_txes)
|
97
89
|
when 'NOT_EQUAL'
|
98
|
-
|
99
|
-
|
90
|
+
# puts " NOT EQUAL"
|
91
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
100
92
|
a << Tree::TreeNode.new(ab,ab_txes)
|
101
93
|
end
|
102
94
|
end
|
103
|
-
|
104
|
-
root.print_tree(1,nil,lambda {|node,pre|
|
95
|
+
# puts "NEW TREE:"
|
96
|
+
# root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
|
105
97
|
b = b.next_sibling
|
106
|
-
|
107
|
-
|
98
|
+
# puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
|
99
|
+
# puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
|
100
|
+
# puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
|
108
101
|
end # siblings.each
|
109
102
|
if !a.children.empty?
|
110
|
-
|
103
|
+
# puts "TRAVERSING DOWN"
|
111
104
|
self.extend_nodes(a, closed_rules: closed_rules)
|
112
105
|
end
|
113
106
|
# add node as closed rule if not subsumed by another rule already added
|
114
107
|
rule_frequency = a.content.size
|
115
108
|
rule = a.name
|
116
109
|
if closed_rules[rule_frequency].nil?
|
117
|
-
|
110
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
118
111
|
closed_rules[rule_frequency] = [rule]
|
119
112
|
else
|
120
113
|
if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
|
121
|
-
|
114
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
122
115
|
closed_rules[rule_frequency] << rule
|
123
116
|
else
|
124
|
-
|
117
|
+
# puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
125
118
|
end
|
126
119
|
end
|
127
120
|
current_node = current_node.next_sibling
|
data/lib/evoc/analyze.rb
CHANGED
@@ -28,6 +28,35 @@ module Evoc
|
|
28
28
|
CSV {|row| row << results}
|
29
29
|
end
|
30
30
|
|
31
|
+
def evolution
|
32
|
+
CSV {|row| row << %w(index relevant_index overlap)}
|
33
|
+
self.tx_store.each do |tx|
|
34
|
+
changed_in = self.tx_store.transactions_of_list(tx.items, strict: false, identifier: :index)
|
35
|
+
previous_txes = changed_in.select {|i| i <= tx.index}
|
36
|
+
previous_txes.each do |prev_index|
|
37
|
+
prev_tx = self.tx_store.get_tx(id: prev_index,id_type: :index)
|
38
|
+
overlap = ((prev_tx.items & tx.items).size/tx.size.to_f).round(2)
|
39
|
+
CSV {|row| row << [tx.index,prev_index,overlap]}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def commits
|
45
|
+
unique_items = Set.new
|
46
|
+
changes_so_far = 0
|
47
|
+
self.tx_store.each do |tx|
|
48
|
+
data = Hash.new
|
49
|
+
tx.items.each {|item| unique_items << item}
|
50
|
+
changes_so_far = changes_so_far += tx.items.size
|
51
|
+
data['sha'] = tx.id
|
52
|
+
data['index'] = tx.index
|
53
|
+
data['num_changes'] = tx.items.size
|
54
|
+
data['items_touched_so_far'] = unique_items.size
|
55
|
+
data['moving_average'] = (changes_so_far/(tx.index+1)).to_f.round(2)
|
56
|
+
STDOUT.puts data.to_json
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
31
60
|
def avg_method_changes_per_parsable_file
|
32
61
|
parsable_files_changed = 0
|
33
62
|
method_changes = 0
|
data/lib/evoc/array.rb
CHANGED
@@ -1,37 +1,49 @@
|
|
1
1
|
class Array
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
2
|
+
|
3
|
+
def mean
|
4
|
+
self.inject(0) { |sum, x| sum += x } / self.size.to_f
|
5
|
+
end
|
6
|
+
|
7
|
+
def median(already_sorted=false)
|
8
|
+
return nil if self.empty?
|
9
|
+
array = (already_sorted ? self : self.sort)
|
10
|
+
m_pos = array.size / 2
|
11
|
+
return array.size % 2 == 1 ? array[m_pos] : array[m_pos-1..m_pos].mean
|
12
|
+
end
|
13
|
+
|
14
|
+
def subset?(other)
|
15
|
+
self & other == self
|
16
|
+
end
|
17
|
+
|
18
|
+
def include_any?(other)
|
19
|
+
(self & other).size > 0
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# returns the union of an array of arraya
|
24
|
+
def array_union
|
25
|
+
if union = self.inject(:|)
|
26
|
+
return union
|
27
|
+
else
|
28
|
+
return []
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# returns the intersection of a list of lists
|
34
|
+
def array_intersection
|
35
|
+
if intersection = self.inject(:&)
|
36
|
+
return intersection
|
37
|
+
else
|
38
|
+
return []
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
##
|
43
|
+
# returns the list of items in self that was not in other
|
44
|
+
def array_difference(other)
|
45
|
+
self.map {|a| a - other}.array_union
|
46
|
+
end
|
35
47
|
|
36
48
|
def self.powerset(set)
|
37
49
|
return [set] if set.empty?
|
data/lib/evoc/evaluate.rb
CHANGED
@@ -11,6 +11,14 @@ module Evoc
|
|
11
11
|
end
|
12
12
|
end
|
13
13
|
|
14
|
+
def self.mean_support(rules:)
|
15
|
+
if rules.empty? then return nil end
|
16
|
+
return (rules.inject(0) {|sum,r| sum + r.m_support.value}/rules.size).to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.mean_support10(rules:)
|
20
|
+
return self.mean_support(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_support']).take(10).flatten.take(10))
|
21
|
+
end
|
14
22
|
|
15
23
|
def self.mean_confidence(rules:)
|
16
24
|
if rules.empty? then return nil end
|
data/lib/evoc/experiment.rb
CHANGED
@@ -17,6 +17,8 @@ module Evoc
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def sample_transactions
|
20
|
+
# initialze a random number generator with fixed seed
|
21
|
+
rand = Random.new(self.opts[:seed])
|
20
22
|
# by default we can sample from the whole history
|
21
23
|
sampling_history = Evoc::HistoryStore.base_history
|
22
24
|
STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
|
@@ -74,7 +76,7 @@ module Evoc
|
|
74
76
|
tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
|
75
77
|
tx_sizes_to_sample_from.each do |group_size|
|
76
78
|
if group_size == '*'
|
77
|
-
sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
|
79
|
+
sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
|
78
80
|
sample << sampled_ids
|
79
81
|
STDERR.puts "Sampled #{sampled_ids.size} txes"
|
80
82
|
# remove sampled txes from sampling_history
|
@@ -87,7 +89,7 @@ module Evoc
|
|
87
89
|
if group.size < self.opts[:sample_size]
|
88
90
|
logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
|
89
91
|
end
|
90
|
-
sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
|
92
|
+
sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
|
91
93
|
sample << sampled_ids
|
92
94
|
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
|
93
95
|
else
|
@@ -109,6 +111,8 @@ module Evoc
|
|
109
111
|
# tx_id, query
|
110
112
|
#
|
111
113
|
def generate_queries
|
114
|
+
# initialze a random number generator with fixed seed
|
115
|
+
rand = Random.new(self.opts[:seed])
|
112
116
|
##
|
113
117
|
# write dict
|
114
118
|
##
|
@@ -154,15 +158,15 @@ module Evoc
|
|
154
158
|
specified_sizes.uniq!
|
155
159
|
|
156
160
|
random_sizes = []
|
157
|
-
if self.opts[:random_select] then random_sizes << Random.new.rand(
|
161
|
+
if self.opts[:random_select] then random_sizes << Random.new.rand(self.opts[:minimum_query_size]..(tx_size-1)) end
|
158
162
|
|
159
163
|
sampled_queries = []
|
160
164
|
# only specified sizes
|
161
165
|
if random_sizes.empty? & !specified_sizes.empty?
|
162
|
-
sampled_queries = specified_sizes.map {|s| items.sample(s)}
|
166
|
+
sampled_queries = specified_sizes.map {|s| items.sample(s, random: rand)}
|
163
167
|
# only random sizes
|
164
168
|
elsif !random_sizes.empty? & specified_sizes.empty?
|
165
|
-
sampled_queries = random_sizes.map {|s| items.sample(s)}
|
169
|
+
sampled_queries = random_sizes.map {|s| items.sample(s, random: rand)}
|
166
170
|
# random + specified = randomly sample in range defined by specified
|
167
171
|
# ex:
|
168
172
|
# specified = [1,3,10,20]
|
@@ -172,9 +176,9 @@ module Evoc
|
|
172
176
|
# 2. randomly select X in specified = Y
|
173
177
|
# 3. randomly select Y in tx
|
174
178
|
elsif !random_sizes.empty? & !specified_sizes.empty?
|
175
|
-
specified_sizes.select! {|s| (s < tx_size) & (s
|
176
|
-
if randomly_sampled_size = specified_sizes.sample #2.
|
177
|
-
sampled_queries = [items.sample(randomly_sampled_size)] #3.
|
179
|
+
specified_sizes.select! {|s| (s < tx_size) & (s >= self.opts[:minimum_query_size])} #1.
|
180
|
+
if randomly_sampled_size = specified_sizes.sample(random: rand) #2.
|
181
|
+
sampled_queries = [items.sample(randomly_sampled_size, random: rand)] #3.
|
178
182
|
end
|
179
183
|
end
|
180
184
|
|
@@ -189,6 +193,9 @@ module Evoc
|
|
189
193
|
logger.debug "The size of the sampled query was equal to the size of the transaction, skipping.. Tx ID: #{tx_id}. Query size: #{query.size}"
|
190
194
|
next
|
191
195
|
end
|
196
|
+
if query.size < self.opts[:minimum_query_size]
|
197
|
+
next
|
198
|
+
end
|
192
199
|
CSV {|row| row << [tx_id,query.join(',')]}
|
193
200
|
end
|
194
201
|
else
|
@@ -231,8 +238,6 @@ module Evoc
|
|
231
238
|
factor_max_size = self.opts[:max_size].nil? ? nil : self.opts[:max_size].map {|s| [ 'max_size',s ]}
|
232
239
|
# Factor: Model age aka number of commits between query and last tx in history
|
233
240
|
factor_model_age = self.opts[:model_age].nil? ? nil : self.opts[:model_age].map {|s| [ 'model_age',s ]}
|
234
|
-
# Factor: Algorithm
|
235
|
-
factor_algorithms = self.opts[:algorithms].nil? ? nil : self.opts[:algorithms].map {|a| ['algorithm',a]}
|
236
241
|
# Factor: Measures
|
237
242
|
factor_measures = self.opts[:measures].map {|c| ['measures',c]}
|
238
243
|
# Factor: Aggregator
|
@@ -253,7 +258,8 @@ module Evoc
|
|
253
258
|
num_lines = File.read(self.opts[:queries]).each_line.count-1
|
254
259
|
current_line = 1
|
255
260
|
|
256
|
-
|
261
|
+
# compact removes nil values (not used factors)
|
262
|
+
factors = [factor_model_size,factor_max_size,factor_model_age,factor_measures,factor_permutation,factor_aggregators].compact
|
257
263
|
num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
|
258
264
|
invalid_configuration = 0
|
259
265
|
last_error = 'no errors'
|
@@ -280,38 +286,53 @@ module Evoc
|
|
280
286
|
end
|
281
287
|
|
282
288
|
current_scenario = 1
|
283
|
-
# - compact removes nil values (not used factors)
|
284
289
|
# - the splat operator '*' turns the array into parameters for #product
|
285
290
|
# - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
|
286
291
|
factors.first.product(*factors[1..-1]).each do |scenario|
|
287
|
-
# Print progress to stderr
|
288
|
-
STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
|
289
|
-
if invalid_configuration > 0
|
290
|
-
STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
|
291
|
-
else
|
292
|
-
STDERR.print " \r"
|
293
|
-
end
|
294
|
-
|
295
292
|
params = query_hash.merge(scenario.to_h)
|
296
293
|
params[:case_id] = self.opts[:case_id]
|
297
294
|
params[:granularity] = self.opts[:granularity]
|
298
295
|
# initialize scenario
|
299
296
|
s = Evoc::Scenario.new(params)
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
297
|
+
scenario_stats = {}
|
298
|
+
if self.opts[:stats]
|
299
|
+
scenario_stats = s.stats
|
300
|
+
end
|
301
|
+
# Factor: Algorithm
|
302
|
+
self.opts[:algorithms].each do |algorithm|
|
303
|
+
s.algorithm = algorithm
|
304
|
+
# Print progress to stderr
|
305
|
+
STDERR.print "(#{self.opts[:case_id]}) Executing #{algorithm} on scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
|
306
|
+
if invalid_configuration > 0
|
307
|
+
STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
|
308
|
+
else
|
309
|
+
STDERR.print " \r"
|
310
|
+
end
|
311
|
+
|
312
|
+
begin
|
313
|
+
Evoc::RecommendationCache.get_recommendation(algorithm: algorithm,
|
314
|
+
query: s.query,
|
315
|
+
model_start: s.model_start,
|
316
|
+
model_end: s.model_end,
|
317
|
+
max_size: s.max_size,
|
318
|
+
aggregator: s.aggregator,
|
319
|
+
measures: s.measures)
|
320
|
+
Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators],
|
321
|
+
topk: self.opts[:topk],
|
322
|
+
unique_consequents: self.opts[:unique_consequents],
|
323
|
+
expected_outcome: s.expected_outcome,
|
324
|
+
measure_combination: s.measures)
|
325
|
+
|
326
|
+
# build json line by merging hashes
|
327
|
+
$stdout.puts s.to_h
|
328
|
+
.merge(scenario_stats)
|
329
|
+
.merge({topk: self.opts[:topk], date: tx.date})
|
330
|
+
.merge(Evoc::RecommendationCache.to_h(measures: s.measures))
|
331
|
+
.to_json
|
332
|
+
rescue ArgumentError => e
|
333
|
+
invalid_configuration += 1
|
334
|
+
last_error = e.message
|
335
|
+
end
|
315
336
|
end
|
316
337
|
current_scenario += 1
|
317
338
|
end
|
@@ -12,14 +12,22 @@ module Evoc
|
|
12
12
|
attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
|
13
13
|
end
|
14
14
|
|
15
|
-
def self.recommendation_cached?(algorithm:,
|
15
|
+
def self.recommendation_cached?(algorithm:,
|
16
|
+
query:,
|
17
|
+
model_start:,
|
18
|
+
model_end:,
|
19
|
+
max_size: nil)
|
16
20
|
return self.tag == [algorithm,query,model_start,model_end,max_size].hash
|
17
21
|
end
|
18
22
|
|
19
23
|
|
20
|
-
|
21
|
-
|
22
|
-
|
24
|
+
def self.get_recommendation(algorithm:,
|
25
|
+
query:,
|
26
|
+
model_start:,
|
27
|
+
model_end:,
|
28
|
+
max_size: nil,
|
29
|
+
aggregator: nil,
|
30
|
+
measures: [])
|
23
31
|
# check if a new base recommendation needs to be generated
|
24
32
|
tag = [algorithm,query,model_start,model_end,max_size].hash
|
25
33
|
if self.tag != tag
|
data/lib/evoc/scenario.rb
CHANGED
@@ -16,6 +16,7 @@ module Evoc
|
|
16
16
|
:model_size,
|
17
17
|
:model_age,
|
18
18
|
:max_size,
|
19
|
+
:stats,
|
19
20
|
:opts
|
20
21
|
|
21
22
|
def initialize(opts = Hash.new)
|
@@ -23,7 +24,7 @@ module Evoc
|
|
23
24
|
self.opts = opts
|
24
25
|
self.scenario_id = opts.hash
|
25
26
|
|
26
|
-
|
27
|
+
self.tx_id = opts[:tx_id]
|
27
28
|
self.model_age = opts[:model_age]
|
28
29
|
opts.each do |attribute,value|
|
29
30
|
self.send("#{attribute}=", value)
|
@@ -55,11 +56,17 @@ module Evoc
|
|
55
56
|
|
56
57
|
|
57
58
|
def to_h
|
58
|
-
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
|
59
|
+
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures stats)
|
59
60
|
hash = Hash.new
|
60
61
|
fields.each do |key|
|
61
62
|
value = self.method(key).call
|
62
|
-
|
63
|
+
if value.is_a?(Array)
|
64
|
+
hash[key] = value.join(',')
|
65
|
+
elsif value.is_a?(Hash)
|
66
|
+
hash.merge!(value)
|
67
|
+
else
|
68
|
+
hash[key] = value
|
69
|
+
end
|
63
70
|
end
|
64
71
|
return hash
|
65
72
|
end
|
@@ -228,5 +235,65 @@ module Evoc
|
|
228
235
|
def tx_size
|
229
236
|
self.tx.size
|
230
237
|
end
|
238
|
+
|
239
|
+
def stats
|
240
|
+
time_start = Time.now
|
241
|
+
history = Evoc::HistoryStore.get_history(self.model_start,self.model_end,self.max_size)
|
242
|
+
relevant_transactions = Set.new
|
243
|
+
relevant_items = Set.new
|
244
|
+
relevant_ages = []
|
245
|
+
avg_age_of_relevant_transactions = 0
|
246
|
+
avg_size_of_relevant_transactions = 0
|
247
|
+
files_changed = Set.new
|
248
|
+
num_methods_changed = 0
|
249
|
+
num_new_items = 0
|
250
|
+
# @avg_size_of_relevant_transactions = 0
|
251
|
+
# mean_age_of_relevant
|
252
|
+
# media_age_of_relevant
|
253
|
+
# ratio_new_items
|
254
|
+
self.query.each do |item|
|
255
|
+
if change = history.int_2_name[item]
|
256
|
+
change = change.split(':')
|
257
|
+
files_changed << change[0]
|
258
|
+
if change.size > 1
|
259
|
+
num_methods_changed = num_methods_changed + 1
|
260
|
+
end
|
261
|
+
indexes_of_previous_changes = history.transactions_of(item, identifier: :index)
|
262
|
+
if new_item = (indexes_of_previous_changes.size == 0)
|
263
|
+
num_new_items = num_new_items + 1
|
264
|
+
else
|
265
|
+
indexes_of_previous_changes.each do |tx_index|
|
266
|
+
relevant_so_far = relevant_transactions.size
|
267
|
+
relevant_transactions << tx_index
|
268
|
+
new_relevant = (relevant_transactions.size > relevant_so_far)
|
269
|
+
if new_relevant
|
270
|
+
age = (self.tx_index - tx_index)
|
271
|
+
relevant_ages << age
|
272
|
+
tx = history.get_tx(id: tx_index,id_type: :index)
|
273
|
+
avg_size_of_relevant_transactions = avg_size_of_relevant_transactions + tx.size
|
274
|
+
avg_age_of_relevant_transactions = avg_age_of_relevant_transactions + age
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
else
|
279
|
+
num_new_items = num_new_items + 1
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
num_relevant_transactions = relevant_transactions.size
|
284
|
+
|
285
|
+
time_end = Time.now
|
286
|
+
time_generate_stats = TimeDifference.between(time_start,time_end).in_seconds.round(8)
|
287
|
+
{time_generate_stats: time_generate_stats,
|
288
|
+
num_files_changed: files_changed.size,
|
289
|
+
num_methods_changed: num_methods_changed,
|
290
|
+
num_new_items: num_new_items,
|
291
|
+
num_relevant_transactions: num_relevant_transactions,
|
292
|
+
median_age_of_relevant_transactions: relevant_ages.median,
|
293
|
+
avg_age_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_age_of_relevant_transactions/num_relevant_transactions).to_f,
|
294
|
+
avg_size_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_size_of_relevant_transactions/num_relevant_transactions).to_f}
|
295
|
+
end
|
296
|
+
|
297
|
+
|
231
298
|
end
|
232
299
|
end
|
data/lib/evoc/version.rb
CHANGED
data/lib/evoc_cli/analyze.rb
CHANGED
@@ -6,6 +6,17 @@ module EvocCLI
|
|
6
6
|
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
7
|
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
8
8
|
|
9
|
+
desc "evolution","Outputs where the items of all transactions previously changed."
|
10
|
+
def evolution
|
11
|
+
a = Evoc::Analyze.new(options)
|
12
|
+
a.evolution
|
13
|
+
end
|
14
|
+
|
15
|
+
desc "commits","Outputs the items touched so far and the moving average of items per transactions for every transaction in the history"
|
16
|
+
def commits
|
17
|
+
a = Evoc::Analyze.new(options)
|
18
|
+
a.commits
|
19
|
+
end
|
9
20
|
|
10
21
|
method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
|
11
22
|
desc "measure_values","Empirically investigate the range of interestingness measures"
|
data/lib/evoc_cli/experiment.rb
CHANGED
@@ -19,6 +19,7 @@ module EvocCLI
|
|
19
19
|
|
20
20
|
##
|
21
21
|
# sample_transactions
|
22
|
+
method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
|
22
23
|
method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
|
23
24
|
method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
|
24
25
|
method_option :recent, type: :numeric, desc: "Filter to the X most recent transactions"
|
@@ -38,6 +39,7 @@ module EvocCLI
|
|
38
39
|
##
|
39
40
|
# generate_queries
|
40
41
|
#
|
42
|
+
method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
|
41
43
|
method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
|
42
44
|
method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
|
43
45
|
method_option :select, aliases: '-s', type: :array, default: [],
|
@@ -46,6 +48,7 @@ module EvocCLI
|
|
46
48
|
desc: "Reverse version of --select (select \"all but\" X)"
|
47
49
|
method_option :percentage, aliases: '-e', type: :array,
|
48
50
|
desc: "Percentage of items to select for each query"
|
51
|
+
method_option :minimum_query_size, type: :numeric, default: 1, desc: "Only sample queries of at least this size"
|
49
52
|
method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
|
50
53
|
method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
|
51
54
|
method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
|
@@ -75,6 +78,7 @@ module EvocCLI
|
|
75
78
|
method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
|
76
79
|
method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
|
77
80
|
method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
|
81
|
+
method_option :stats, type: :boolean, required: false, desc: "Generate extra stats describing each scenario"
|
78
82
|
desc "execute_scenarios [options]",""
|
79
83
|
def execute_scenarios
|
80
84
|
if !options[:permutation].nil?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-03-
|
11
|
+
date: 2017-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|