evoc 3.6.2 → 3.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/evoc.gemspec +1 -0
- data/lib/evoc/algorithm.rb +72 -5
- data/lib/evoc/algorithms/closed_rules.rb +145 -0
- data/lib/evoc/evaluate.rb +233 -58
- data/lib/evoc/exceptions/configuration_error.rb +6 -0
- data/lib/evoc/exceptions/format_error.rb +6 -0
- data/lib/evoc/experiment.rb +51 -8
- data/lib/evoc/recommendation_cache.rb +14 -17
- data/lib/evoc/rule.rb +8 -4
- data/lib/evoc/rule_store.rb +59 -26
- data/lib/evoc/scenario.rb +0 -34
- data/lib/evoc/tree/tree_node.rb +26 -0
- data/lib/evoc/tx_store.rb +8 -0
- data/lib/evoc/util.rb +37 -0
- data/lib/evoc/version.rb +1 -1
- data/lib/evoc_cli/experiment.rb +57 -1
- data/lib/evoc_helper.rb +3 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b867c7a5e05b3c2be58b9dd361554a43f3f277c
|
4
|
+
data.tar.gz: 9d00091a6fd7685f048930889aaf252df5aa6634
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f89dbef20f735e0f8c6b8f7104ea8b38a4dd118707089b17acdf529591fd652478406047cb5d4b08e18afb746d3edf46dc7cf9ac7eb208d214f91e9050841c12
|
7
|
+
data.tar.gz: 7e1955af5653df5d7afd986e9d522178b32d10c26771833e4d37aec910dd93208de56c8a6f83bd21faeb8796b098da808f72a05c9f6e3caf1100064fd20d7bb0
|
data/.gitignore
CHANGED
data/evoc.gemspec
CHANGED
data/lib/evoc/algorithm.rb
CHANGED
@@ -20,8 +20,8 @@ module Evoc
|
|
20
20
|
Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
21
21
|
elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
22
22
|
Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
23
|
-
elsif Evoc::Algorithm.respond_to?(algorithm
|
24
|
-
Evoc::Algorithm.method(algorithm
|
23
|
+
elsif Evoc::Algorithm.respond_to?(algorithm)
|
24
|
+
Evoc::Algorithm.method(algorithm).call(tx_store:tx_store,query:query)
|
25
25
|
else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
|
26
26
|
end
|
27
27
|
end
|
@@ -105,6 +105,70 @@ module Evoc
|
|
105
105
|
end
|
106
106
|
|
107
107
|
|
108
|
+
def self.not_subsumed(tx_store:, query:)
|
109
|
+
#initial filter, we consider all txes where something in the query changed
|
110
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
111
|
+
# now find what subsets of the query changed in each tx
|
112
|
+
trie = Containers::Trie.new
|
113
|
+
query_changed_in.each do |tx_id|
|
114
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
115
|
+
antecedent = (query & tx.items)
|
116
|
+
consequents = (tx.items - antecedent)
|
117
|
+
if consequents.size != 0
|
118
|
+
consequents.each do |consequent|
|
119
|
+
entry = "#{consequent.to_s}#{antecedent.join('')}"
|
120
|
+
if trie.get(entry).nil?
|
121
|
+
puts "ADDED #{entry}"
|
122
|
+
trie.push(entry,consequent.to_s)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
return trie
|
128
|
+
# now generate rules
|
129
|
+
# rule_store = Evoc::RuleStore.new(query: query)
|
130
|
+
# rules.each do |consequent,antecedents|
|
131
|
+
# antecedents.each do |antecedent|
|
132
|
+
# rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
|
133
|
+
# end
|
134
|
+
# end
|
135
|
+
# return rule_store
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Find the largest rules for each unique consequent
|
140
|
+
def self.largest_rules(tx_store:,query:)
|
141
|
+
#initial filter, we consider all txes where something in the query changed
|
142
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
143
|
+
# now find what subsets of the query changed in each tx
|
144
|
+
rules = Hash.new
|
145
|
+
query_changed_in.each do |tx_id|
|
146
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
147
|
+
antecedent = (query & tx.items)
|
148
|
+
consequents = (tx.items - antecedent)
|
149
|
+
if consequents.size != 0
|
150
|
+
consequents.each do |consequent|
|
151
|
+
if rules[consequent].nil?
|
152
|
+
rules[consequent] = Set.new([antecedent]) # new consequent
|
153
|
+
elsif antecedent.size > rules[consequent].first.size # larger antecedent
|
154
|
+
rules[consequent] = Set.new([antecedent])
|
155
|
+
elsif antecedent.size == rules[consequent].first.size # equally large antecedent
|
156
|
+
rules[consequent] << antecedent
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
# now generate rules
|
162
|
+
rule_store = Evoc::RuleStore.new(query: query)
|
163
|
+
rules.each do |consequent,antecedents|
|
164
|
+
antecedents.each do |antecedent|
|
165
|
+
rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return rule_store
|
169
|
+
end
|
170
|
+
|
171
|
+
|
108
172
|
##
|
109
173
|
# TARMAQ
|
110
174
|
# find largest subsets in @query with evidence in @tx_store version
|
@@ -113,7 +177,6 @@ module Evoc
|
|
113
177
|
#initial filter, we consider all txes where something in the query changed
|
114
178
|
query_changed_in = tx_store.transactions_of_list(query)
|
115
179
|
# now find what subsets of the query changed in each tx
|
116
|
-
rules = Hash.new
|
117
180
|
query_changed_in.each do |tx_id|
|
118
181
|
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
119
182
|
largest_match_in_query = (query & tx.items)
|
@@ -134,14 +197,18 @@ module Evoc
|
|
134
197
|
###
|
135
198
|
## rose
|
136
199
|
###
|
137
|
-
def self.
|
200
|
+
def self.rose(tx_store:,query:)
|
138
201
|
qs = query.size
|
139
202
|
self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
|
140
203
|
end
|
141
204
|
|
142
|
-
def self.
|
205
|
+
def self.co_change(tx_store:, query:)
|
143
206
|
self.cached_rule_range(1,1,tx_store: tx_store, query: query)
|
144
207
|
end
|
145
208
|
|
209
|
+
def self.closed_rules(tx_store:, query:)
|
210
|
+
Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
|
211
|
+
end
|
212
|
+
|
146
213
|
end # Algorithm
|
147
214
|
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
module Evoc
|
2
|
+
class ClosedRules
|
3
|
+
def self.closed_rules(tx_store:,query:)
|
4
|
+
# @@store = tx_store
|
5
|
+
# create initial trees, one tree per consequent
|
6
|
+
tree = self.initialize_tree(tx_store,query)
|
7
|
+
# puts "INIT TREE:"
|
8
|
+
# tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
|
9
|
+
closed_rules = Evoc::RuleStore.new(query: query)
|
10
|
+
tree.children.each do |consequent|
|
11
|
+
self.extend_nodes(consequent).each do |frequency, closed_sets|
|
12
|
+
closed_sets.each do |closed_set|
|
13
|
+
antecedent = closed_set - consequent.name
|
14
|
+
closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
return closed_rules
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def self.initialize_tree(tx_store, query)
|
23
|
+
tree = Tree::TreeNode.new([])
|
24
|
+
# find all items that changed with something in the query
|
25
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
26
|
+
# store all items from the query that have changed with each consequent
|
27
|
+
query_changed_in.each do |tx_id|
|
28
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
29
|
+
antecedent = (query & tx.items)
|
30
|
+
consequents = (tx.items - antecedent)
|
31
|
+
if consequents.size != 0
|
32
|
+
consequents.each do |consequent|
|
33
|
+
consequent_key = [consequent.to_s]
|
34
|
+
if tree[consequent_key].nil?
|
35
|
+
# initialize candidates
|
36
|
+
tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
37
|
+
end
|
38
|
+
txes_consequent = tree[consequent_key].content
|
39
|
+
antecedent.each do |item|
|
40
|
+
union = [item.to_s,consequent.to_s]
|
41
|
+
if tree[consequent_key][union].nil?
|
42
|
+
txes_union = tx_store.transactions_of(item) & txes_consequent
|
43
|
+
tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
return(tree)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.extend_nodes(root,closed_rules: {})
|
53
|
+
current_node = root.first_child
|
54
|
+
while(!current_node.nil?) do
|
55
|
+
a = current_node
|
56
|
+
b = a.next_sibling
|
57
|
+
while(!b.nil?) do
|
58
|
+
# print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
|
59
|
+
ab = a.name | b.name
|
60
|
+
a_txes = a.content
|
61
|
+
b_txes = b.content
|
62
|
+
ab_txes = a_txes & b_txes
|
63
|
+
# check properties
|
64
|
+
# 1. when txes are the same
|
65
|
+
# - remove B
|
66
|
+
# - replace all A with union of A and B
|
67
|
+
if ab_txes.size > 0
|
68
|
+
case self.compare(a_txes,b_txes)
|
69
|
+
when 'EQUAL'
|
70
|
+
# puts " EQUAL"
|
71
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
72
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
73
|
+
temp = b.previous_sibling
|
74
|
+
root.remove!(b)
|
75
|
+
b = temp
|
76
|
+
a.each {|n| n.rename(ab | n.name)}
|
77
|
+
when 'A_IN_B'
|
78
|
+
# puts " A in B"
|
79
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
80
|
+
a.each {|n| n.rename(ab | n.name)}
|
81
|
+
when 'B_IN_A'
|
82
|
+
# puts " B in A"
|
83
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
84
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
85
|
+
temp = b.previous_sibling
|
86
|
+
root.remove!(b)
|
87
|
+
b = temp
|
88
|
+
a << Tree::TreeNode.new(ab,ab_txes)
|
89
|
+
when 'NOT_EQUAL'
|
90
|
+
# puts " NOT EQUAL"
|
91
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
92
|
+
a << Tree::TreeNode.new(ab,ab_txes)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
# puts "NEW TREE:"
|
96
|
+
# root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
|
97
|
+
b = b.next_sibling
|
98
|
+
# puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
|
99
|
+
# puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
|
100
|
+
# puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
|
101
|
+
end # siblings.each
|
102
|
+
if !a.children.empty?
|
103
|
+
# puts "TRAVERSING DOWN"
|
104
|
+
self.extend_nodes(a, closed_rules: closed_rules)
|
105
|
+
end
|
106
|
+
# add node as closed rule if not subsumed by another rule already added
|
107
|
+
rule_frequency = a.content.size
|
108
|
+
rule = a.name
|
109
|
+
if closed_rules[rule_frequency].nil?
|
110
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
111
|
+
closed_rules[rule_frequency] = [rule]
|
112
|
+
else
|
113
|
+
if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
|
114
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
115
|
+
closed_rules[rule_frequency] << rule
|
116
|
+
else
|
117
|
+
# puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
current_node = current_node.next_sibling
|
121
|
+
end # children.each
|
122
|
+
return(closed_rules)
|
123
|
+
end #extend_nodes
|
124
|
+
|
125
|
+
def self.compare(a,b)
|
126
|
+
if a == b
|
127
|
+
return 'EQUAL'
|
128
|
+
# 2. when A is a subset of B
|
129
|
+
# - replace all A with union of A and B
|
130
|
+
elsif (a - b).empty?
|
131
|
+
return 'A_IN_B'
|
132
|
+
# 2. when B is a subset of A
|
133
|
+
# - remove B
|
134
|
+
# - add the union as new child
|
135
|
+
elsif (b - a).empty?
|
136
|
+
return 'B_IN_A'
|
137
|
+
# 4. contain different elements
|
138
|
+
# - add the union as new child
|
139
|
+
else
|
140
|
+
return 'NOT_EQUAL'
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
data/lib/evoc/evaluate.rb
CHANGED
@@ -1,42 +1,251 @@
|
|
1
1
|
module Evoc
|
2
|
-
|
2
|
+
module Evaluate
|
3
3
|
extend Logging
|
4
4
|
|
5
|
+
def self.validateInput(input)
|
6
|
+
# verify format
|
7
|
+
if !input.is_a?(Array) || # not an array
|
8
|
+
!input.first.is_a?(Array) || # not containg an array
|
9
|
+
![0,1].include?(input.first.first) # items are not 0s and 1s
|
10
|
+
raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected list of list of 0s and 1s, input was: #{input}"
|
11
|
+
end
|
12
|
+
end
|
5
13
|
|
6
|
-
def self.
|
7
|
-
if
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
14
|
+
def self.mean_confidence(rules:)
|
15
|
+
if rules.empty? then return nil end
|
16
|
+
return (rules.inject(0) {|sum,r| sum + r.m_confidence.value}/rules.size).to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.discernibility(rec:)
|
20
|
+
# AP is 0 for the empty list
|
21
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
22
|
+
return nil
|
23
|
+
end
|
24
|
+
self.validateInput(rec)
|
25
|
+
|
26
|
+
rec_size = 0
|
27
|
+
rec_clusters = 0
|
28
|
+
|
29
|
+
rec.each do |c|
|
30
|
+
rec_clusters = rec_clusters + 1
|
31
|
+
c.each do |e|
|
32
|
+
rec_size = rec_size + 1
|
12
33
|
end
|
13
|
-
elsif match = /top10_recall/.match(evaluator)
|
14
|
-
self.top10_recall(recommendation,expected_outcome)
|
15
|
-
else raise ArgumentError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
|
16
34
|
end
|
35
|
+
return (rec_clusters/rec_size).to_f
|
36
|
+
|
17
37
|
end
|
18
38
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
39
|
+
def self.applicable(rec:)
|
40
|
+
if rec.is_a?(Array)
|
41
|
+
(rec <=> []).abs
|
42
|
+
else
|
43
|
+
raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected an array, input was: #{input}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# @return the f1 score (preision/recall harmonic mean)
|
49
|
+
def self.f1(rec:,exp:)
|
50
|
+
# AP is 0 for the empty list
|
51
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
52
|
+
return nil
|
53
|
+
end
|
54
|
+
self.validateInput(rec)
|
55
|
+
|
56
|
+
rec_size = 0
|
57
|
+
rec_correct = 0
|
58
|
+
|
59
|
+
rec.each do |c|
|
60
|
+
c.each do |e|
|
61
|
+
rec_size = rec_size + 1
|
62
|
+
rec_correct = rec_correct + e
|
63
|
+
end
|
64
|
+
end
|
65
|
+
return (2*rec_correct/(rec_size + exp)).to_f
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
##
|
70
|
+
# @return the rank of the first relevant itemjk
|
71
|
+
def self.first_relevant(rec:)
|
72
|
+
# AP is 0 for the empty list
|
73
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
self.validateInput(rec)
|
77
|
+
|
78
|
+
last_checked = 1
|
79
|
+
rec.each do |c|
|
80
|
+
c.each do |e|
|
81
|
+
if e == 1
|
82
|
+
return last_checked
|
83
|
+
end
|
84
|
+
last_checked = last_checked + 1
|
31
85
|
end
|
86
|
+
end
|
87
|
+
return nil
|
32
88
|
end
|
33
89
|
|
90
|
+
##
|
91
|
+
# @return the rank of the last relevant itemjk
|
92
|
+
def self.last_relevant(rec:)
|
93
|
+
# AP is 0 for the empty list
|
94
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
self.validateInput(rec)
|
98
|
+
|
99
|
+
size = rec.inject(0) {|sum,c| sum + c.size}
|
100
|
+
last_checked = size
|
101
|
+
rec.reverse_each do |c|
|
102
|
+
c.reverse_each do |e|
|
103
|
+
if e == 1
|
104
|
+
return last_checked
|
105
|
+
end
|
106
|
+
last_checked = last_checked - 1
|
107
|
+
end
|
108
|
+
end
|
109
|
+
return nil
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.recall10(rec:,exp: nil)
|
113
|
+
# AP is 0 for the empty list
|
114
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
115
|
+
return nil
|
116
|
+
end
|
117
|
+
self.validateInput(rec)
|
118
|
+
return self.recall(rec: [rec.take(10).flatten.take(10)],exp: exp)
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.precision10(rec:,exp: nil)
|
122
|
+
# AP is 0 for the empty list
|
123
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
124
|
+
return nil
|
125
|
+
end
|
126
|
+
self.validateInput(rec)
|
127
|
+
return self.precision(rec: [rec.take(10).flatten.take(10)])
|
128
|
+
end
|
129
|
+
|
130
|
+
def self.precision(rec:,exp: nil)
|
131
|
+
# AP is 0 for the empty list
|
132
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
133
|
+
return nil
|
134
|
+
end
|
135
|
+
self.validateInput(rec)
|
136
|
+
|
137
|
+
size_rec = rec.inject(0) {|sum,c| sum + c.size}
|
138
|
+
num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
|
139
|
+
|
140
|
+
return (num_correct_in_rec/size_rec).to_f
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.recall(rec:,exp: nil)
|
144
|
+
# AP is 0 for the empty list
|
145
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
146
|
+
return nil
|
147
|
+
end
|
148
|
+
self.validateInput(rec)
|
149
|
+
|
150
|
+
num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
|
151
|
+
|
152
|
+
if exp.nil?
|
153
|
+
return num_correct_in_rec
|
154
|
+
else
|
155
|
+
if num_correct_in_rec > exp
|
156
|
+
raise ArgumentError, "Found more relevant items than the provided number of relevant items"
|
157
|
+
end
|
158
|
+
return (num_correct_in_rec/exp).to_f
|
159
|
+
end
|
160
|
+
end
|
161
|
+
# clustered recommendation is expected to be a sorted list V
|
162
|
+
# where V = [V1,V2,..Vn]
|
163
|
+
# and Vi is a cluster of items with the same weight like [rel_1,rel_2,..,rel_n]
|
164
|
+
# where rel_i is 1 if the item is relevant and 0 if not
|
165
|
+
|
166
|
+
# r_p : relevant items in previous groups
|
167
|
+
# i_p : index previous group
|
168
|
+
# r_g : relevant items in group
|
169
|
+
# n_g : items in group
|
170
|
+
# i : index of current item
|
171
|
+
def self.t_ap(rec:,exp: nil)
|
172
|
+
# AP is 0 for the empty list
|
173
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
174
|
+
return nil
|
175
|
+
end
|
176
|
+
self.validateInput(rec)
|
177
|
+
|
178
|
+
ap = 0
|
179
|
+
r_p = 0
|
180
|
+
i_p = 0
|
181
|
+
rec.each do |cluster|
|
182
|
+
r_g = cluster.inject(&:+).to_r
|
183
|
+
n_g = cluster.size.to_r
|
184
|
+
cluster.each_with_index do |_,i|
|
185
|
+
i = i_p + i + 1
|
186
|
+
chance_relevant = r_g/n_g
|
187
|
+
avg_previous_rel = if (n_g == 1)
|
188
|
+
(r_p + 1) * (1/i)
|
189
|
+
else
|
190
|
+
(r_p + (i - i_p - 1)*((r_g-1)/(n_g-1)) + 1) * (1/i)
|
191
|
+
end
|
192
|
+
|
193
|
+
item_ap_contribution = chance_relevant * avg_previous_rel
|
194
|
+
|
195
|
+
ap = ap + item_ap_contribution
|
196
|
+
end
|
197
|
+
r_p = r_p + r_g
|
198
|
+
i_p = i_p + n_g
|
199
|
+
end
|
200
|
+
# if the number of relevant documents is not supplied
|
201
|
+
# assume that the recommendation contains all relevant documents
|
202
|
+
if exp.nil?
|
203
|
+
exp = r_p
|
204
|
+
else
|
205
|
+
if r_p > exp
|
206
|
+
raise ArgumentError, "Found more relevant items than the provided number of relevant items"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
return (r_p == 0 ? 0 : (ap/exp).to_f)
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.ap(rec:,exp: nil)
|
213
|
+
# AP is 0 for the empty list
|
214
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
215
|
+
return nil
|
216
|
+
end
|
217
|
+
self.validateInput(rec)
|
218
|
+
|
219
|
+
i = 0
|
220
|
+
correct_i = 0
|
221
|
+
ap = 0
|
222
|
+
|
223
|
+
rec.each do |cluster|
|
224
|
+
cluster.each do |item|
|
225
|
+
i = i + 1
|
226
|
+
correct_i = correct_i + item
|
227
|
+
precision_i = correct_i/i
|
228
|
+
ap = ap + (precision_i*item)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
if exp.nil?
|
233
|
+
exp = correct_i
|
234
|
+
else
|
235
|
+
if correct_i > exp
|
236
|
+
raise ArgumentError, "Found more relevant items than the provided number of relevant items"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
return (exp == 0 ? 0 : (ap/exp).to_f)
|
240
|
+
|
241
|
+
end
|
34
242
|
##
|
35
243
|
# calculate the average precision of the result based on an expected outcome
|
36
244
|
# @param [Array] recommendation a sorted array
|
37
245
|
# @param [Array] expected_outcome an array of items
|
38
246
|
# @return [Float] the average precision
|
39
|
-
def self.average_precision(recommendation,expected_outcome
|
247
|
+
def self.average_precision(recommendation,expected_outcome)
|
248
|
+
raise Error.new "#average_precision has been deprecated, use #ap instead"
|
40
249
|
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
41
250
|
if (expected_outcome.size > 0) & !recommendation.empty?
|
42
251
|
average_precision = 0
|
@@ -45,7 +254,7 @@ module Evoc
|
|
45
254
|
# sort rules by weight
|
46
255
|
# we first group rules with equal weights
|
47
256
|
# and then sort the groups by weight
|
48
|
-
recommendation.
|
257
|
+
recommendation.each do |items|
|
49
258
|
if !items.is_a?(Array) then items = [items] end
|
50
259
|
if items.first.class != expected_outcome.first.class
|
51
260
|
raise ArgumentError, "Expected outcome was of type #{expected_outcome.first.class}, while the item in the recommendation was of type #{items.first.class}"
|
@@ -71,39 +280,5 @@ module Evoc
|
|
71
280
|
nil
|
72
281
|
end
|
73
282
|
end
|
74
|
-
|
75
|
-
# calculate the grouped average precision of the result based on an expected outcome
|
76
|
-
def self.e_collected_average_precision(expected_outcome)
|
77
|
-
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
78
|
-
if (expected_outcome.size > 0) & !self.empty?
|
79
|
-
collected_average_precision = 0
|
80
|
-
correct_items = []
|
81
|
-
total_items_considered = []
|
82
|
-
# sort rules by weight
|
83
|
-
# we first group rules with equal weights
|
84
|
-
# and then sort the groups by weight
|
85
|
-
groups = self.group_by {|r| r.weight}.sort.reverse
|
86
|
-
groups.each do |(_,rules)|
|
87
|
-
items = rules.map(&:rhs).flatten.uniq
|
88
|
-
if (new_items = items - total_items_considered).size > 0
|
89
|
-
new_items.each {|item| total_items_considered << item}
|
90
|
-
if correct_in_group = (items & expected_outcome)
|
91
|
-
if correct_in_group.size > 0
|
92
|
-
# make sure that the new items havent already been added earlier
|
93
|
-
new_correct = (correct_in_group - correct_items)
|
94
|
-
# add new items
|
95
|
-
new_correct.each {|item| correct_items << item}
|
96
|
-
change_in_recall = new_correct.size.to_r/expected_outcome.size
|
97
|
-
precision_at_k = correct_items.size.to_r/total_items_considered.size
|
98
|
-
collected_average_precision += (precision_at_k * change_in_recall)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
self.collected_average_precision = collected_average_precision.to_f
|
104
|
-
else
|
105
|
-
self.collected_average_precision = nil
|
106
|
-
end
|
107
|
-
end
|
108
283
|
end
|
109
284
|
end
|
data/lib/evoc/experiment.rb
CHANGED
@@ -78,6 +78,23 @@ module Evoc
|
|
78
78
|
# tx_id, query
|
79
79
|
#
|
80
80
|
def generate_queries
|
81
|
+
##
|
82
|
+
# write dict
|
83
|
+
##
|
84
|
+
if path = self.opts[:write_dict]
|
85
|
+
tmp = Tempfile.new('dict')
|
86
|
+
begin
|
87
|
+
tmp.puts("id,name")
|
88
|
+
Evoc::HistoryStore.base_history.int_2_name.each do |id,name|
|
89
|
+
tmp.puts("#{id},#{name}")
|
90
|
+
end
|
91
|
+
tmp.close
|
92
|
+
FileUtils.mv(tmp.path,path)
|
93
|
+
ensure
|
94
|
+
tmp.close
|
95
|
+
tmp.unlink
|
96
|
+
end
|
97
|
+
end
|
81
98
|
##
|
82
99
|
# WRITE CSV HEADER
|
83
100
|
CSV {|row| row << %W(tx_id query)}
|
@@ -124,7 +141,7 @@ module Evoc
|
|
124
141
|
# 2. randomly select X in specified = Y
|
125
142
|
# 3. randomly select Y in tx
|
126
143
|
elsif !random_sizes.empty? & !specified_sizes.empty?
|
127
|
-
specified_sizes.select! {|s| (s < tx_size) & (s >
|
144
|
+
specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
|
128
145
|
if randomly_sampled_size = specified_sizes.sample #2.
|
129
146
|
sampled_queries = [items.sample(randomly_sampled_size)] #3.
|
130
147
|
end
|
@@ -216,27 +233,53 @@ module Evoc
|
|
216
233
|
break
|
217
234
|
end
|
218
235
|
end
|
236
|
+
# get query
|
237
|
+
query_hash = query.to_h
|
238
|
+
# convert query string to array of items
|
239
|
+
query_hash['query'] = query_hash['query'].split(',').map(&:to_i)
|
240
|
+
# verify query before executing
|
241
|
+
if tx = Evoc::HistoryStore.base_history.get_tx(id: query_hash['tx_id'],id_type: :id)
|
242
|
+
if !(query_hash['query'] - tx.items).empty?
|
243
|
+
raise Evoc::Exceptions::ConfigurationError.new "The query generated from #{query_hash['tx_id']} was not a subset of the same tx in the loaded history. The query was: '#{query_hash['query']}', the tx was '#{tx.items}'"
|
244
|
+
end
|
245
|
+
else
|
246
|
+
raise Evoc::Exceptions::ConfigurationError.new "Could not find the tx: '#{query_hash['tx_id']}' from #{self.opts[:queries]} in the history #{self.opts[:transactions]}"
|
247
|
+
end
|
248
|
+
|
219
249
|
current_scenario = 1
|
250
|
+
last_error = 'no errors'
|
220
251
|
# - compact removes nil values (not used factors)
|
221
252
|
# - the splat operator '*' turns the array into parameters for #product
|
222
253
|
# - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
|
223
254
|
factors.first.product(*factors[1..-1]).each do |scenario|
|
224
255
|
# Print progress to stderr
|
225
|
-
STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}
|
256
|
+
STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
|
257
|
+
if invalid_configuration > 0
|
258
|
+
STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
|
259
|
+
else
|
260
|
+
STDERR.print " \r"
|
261
|
+
end
|
226
262
|
|
227
|
-
query_hash = query.to_h
|
228
|
-
# convert query to array
|
229
|
-
query_hash['query'] = query_hash['query'].split(',')
|
230
263
|
params = query_hash.merge(scenario.to_h)
|
231
264
|
params[:case_id] = self.opts[:case_id]
|
232
265
|
params[:granularity] = self.opts[:granularity]
|
233
266
|
# initialize scenario
|
234
|
-
|
267
|
+
s = Evoc::Scenario.new(params)
|
235
268
|
begin
|
236
|
-
|
237
|
-
|
269
|
+
Evoc::RecommendationCache.get_recommendation(algorithm: s.algorithm,
|
270
|
+
query: s.query,
|
271
|
+
model_start: s.model_start,
|
272
|
+
model_end: s.model_end,
|
273
|
+
max_size: s.max_size,
|
274
|
+
aggregator: s.aggregator,
|
275
|
+
measures: s.measures)
|
276
|
+
Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], top_k: self.opts[:top_k], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
|
277
|
+
result = Evoc::RecommendationCache.to_h(measures: s.measures)
|
278
|
+
# merge scenario params with result hash and dump as json
|
279
|
+
$stdout.puts s.to_h.merge(result).to_json
|
238
280
|
rescue ArgumentError => e
|
239
281
|
invalid_configuration += 1
|
282
|
+
last_error = e.message
|
240
283
|
end
|
241
284
|
current_scenario += 1
|
242
285
|
end
|
@@ -9,7 +9,7 @@ module Evoc
|
|
9
9
|
# time: the time it took to generate the currently cached recommendation
|
10
10
|
# model_size: the number of transactions used when generating the currently cached recommendation
|
11
11
|
class << self
|
12
|
-
attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :
|
12
|
+
attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.recommendation_cached?(algorithm:,
|
@@ -20,6 +20,7 @@ module Evoc
|
|
20
20
|
return self.tag == [algorithm,query,model_start,model_end,max_size].hash
|
21
21
|
end
|
22
22
|
|
23
|
+
|
23
24
|
def self.get_recommendation(algorithm:,
|
24
25
|
query:,
|
25
26
|
model_start:,
|
@@ -75,14 +76,13 @@ module Evoc
|
|
75
76
|
# @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
|
76
77
|
#
|
77
78
|
# @return [Hash[aggregator][evaluator][result]] the hash of results
|
78
|
-
def self.
|
79
|
+
def self.evaluate_last(evaluators: ,top_k: nil, unique_consequents: nil,expected_outcome:,measure_combination: )
|
79
80
|
if !self.last_recommendation.nil?
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
self.time_evaluation = TimeDifference.between(t1,t2).in_seconds.round(8)
|
81
|
+
self.evaluation = self.last_recommendation.evaluate_with(evaluators: evaluators,
|
82
|
+
top_k: top_k,
|
83
|
+
unique_consequents: unique_consequents,
|
84
|
+
expected_outcome: expected_outcome,
|
85
|
+
measure_combination: measure_combination)
|
86
86
|
else
|
87
87
|
STDERR.puts "TAG = #{self.tag}No recommendation to evaluate"
|
88
88
|
end
|
@@ -94,10 +94,7 @@ module Evoc
|
|
94
94
|
# time: 'execution time',
|
95
95
|
# filtered_model_size:
|
96
96
|
# number_of_rules :
|
97
|
-
#
|
98
|
-
# average_precision: ..,
|
99
|
-
# ..next evaluator..
|
100
|
-
# }
|
97
|
+
# average_precision:
|
101
98
|
# rules: [
|
102
99
|
# {
|
103
100
|
# lhs: [lhs]
|
@@ -118,7 +115,6 @@ module Evoc
|
|
118
115
|
recommendation_hash[:time_rulegeneration] = self.time_rulegeneration
|
119
116
|
recommendation_hash[:time_measurecalculation] = self.time_measurecalculation
|
120
117
|
recommendation_hash[:time_aggregation] = self.time_aggregation
|
121
|
-
recommendation_hash[:time_evaluation] = self.time_evaluation
|
122
118
|
recommendation_hash[:filtered_model_size] = self.filtered_model_size
|
123
119
|
recommendation_hash[:number_of_baserules] = self.base_recommendation.size
|
124
120
|
recommendation_hash[:number_of_rules] = self.last_recommendation.size
|
@@ -128,10 +124,11 @@ module Evoc
|
|
128
124
|
sum + r.get_measure('m_hyper_coefficient').value } / self.last_recommendation.size
|
129
125
|
recommendation_hash[:largest_antecedent] = self.last_recommendation.largest_antecedent
|
130
126
|
if !self.evaluation.nil?
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
127
|
+
self.evaluation.each do |evaluator,results|
|
128
|
+
recommendation_hash[evaluator] = results['value']
|
129
|
+
# time can also be added like this:
|
130
|
+
# recommendation_hash[evaluator+'_time'] = results['time']
|
131
|
+
end
|
135
132
|
end
|
136
133
|
recommendation_hash[:rules] = []
|
137
134
|
self.last_recommendation.each do |rule|
|
data/lib/evoc/rule.rb
CHANGED
@@ -21,15 +21,19 @@ module Evoc
|
|
21
21
|
name
|
22
22
|
end
|
23
23
|
|
24
|
+
def human_name
|
25
|
+
"#{human_lhs} -> #{human_rhs}"
|
26
|
+
end
|
27
|
+
|
24
28
|
def human_lhs
|
25
|
-
if !self.tx_store.nil? & self.lhs.all? {|i| i.is_a?(Numeric)}
|
26
|
-
self.lhs.map
|
29
|
+
if !self.tx_store.nil? # & self.lhs.all? {|i| i.is_a?(Numeric)}
|
30
|
+
self.tx_store.ints2names(self.lhs.map(&:to_i)).join(',')
|
27
31
|
end
|
28
32
|
end
|
29
33
|
|
30
34
|
def human_rhs
|
31
|
-
if !self.tx_store.nil?
|
32
|
-
self.rhs.map
|
35
|
+
if !self.tx_store.nil? #& self.rhs.all? {|i| i.is_a?(Numeric)}
|
36
|
+
self.tx_store.ints2names(self.rhs.map(&:to_i)).join(',')
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
data/lib/evoc/rule_store.rb
CHANGED
@@ -116,6 +116,33 @@ module Evoc
|
|
116
116
|
end
|
117
117
|
|
118
118
|
|
119
|
+
# Needed by Evaluate mixin
|
120
|
+
def evaluation_format(measures:, expected_outcome:)
|
121
|
+
current_weight = nil
|
122
|
+
current_group = []
|
123
|
+
recommendation = []
|
124
|
+
# sort and filter out duplicate consequents
|
125
|
+
self.sort_on(measures: measures, rules: self.unique_by(measures.first)).each do |r|
|
126
|
+
expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
|
127
|
+
weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
|
128
|
+
if current_weight.nil?
|
129
|
+
current_weight = weight_tag
|
130
|
+
end
|
131
|
+
if weight_tag == current_weight
|
132
|
+
current_group << expected
|
133
|
+
else
|
134
|
+
recommendation << current_group
|
135
|
+
current_group = [expected]
|
136
|
+
current_weight = weight_tag
|
137
|
+
end
|
138
|
+
end
|
139
|
+
# add last group if not empty
|
140
|
+
if !current_group.empty?
|
141
|
+
recommendation << current_group
|
142
|
+
end
|
143
|
+
return recommendation
|
144
|
+
end
|
145
|
+
|
119
146
|
##
|
120
147
|
# Evaluate this recommendation using the given evaluator
|
121
148
|
#
|
@@ -126,27 +153,35 @@ module Evoc
|
|
126
153
|
# @param [String] evaluator the method to use for evaluating
|
127
154
|
# @param [Array] expected_outcome the list of items to evaluate against
|
128
155
|
# @param [Array] measure_combination the list of measures used to first sort the recommendation
|
129
|
-
def evaluate_with(
|
156
|
+
def evaluate_with(evaluators:,expected_outcome:,measure_combination:,top_k: nil,unique_consequents: nil)
|
130
157
|
if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
|
131
|
-
|
132
|
-
logger.debug "#{__method__} params: evaluator: #{evaluator}, measure_combination: #{measure_combination}"
|
158
|
+
logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
|
133
159
|
# sort the rules on each combination and evaluate
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
# get the strongest unique rules
|
142
|
-
unique_rules = self.unique_by(measure_combination.first)
|
143
|
-
sorted_rules = self.sort_on(rules: unique_rules,measures: measure_combination)
|
144
|
-
end
|
145
|
-
# get the recommended items
|
146
|
-
recommendation = sorted_rules.map(&:rhs)
|
160
|
+
# if !top_k.nil?
|
161
|
+
# raise ArgumentError, "Top K must be a number" unless top_k.is_a?(Numeric)
|
162
|
+
# sorted_rules = sorted_rules.take(top_k)
|
163
|
+
# end
|
164
|
+
# convert rules into format used in evaluation
|
165
|
+
# map to 0/1 list where 1 is a correct item and 0 is not
|
166
|
+
# second item in each tuple gives the weight of the rule
|
147
167
|
# evaluate the sorted list against the expected outcome
|
148
|
-
|
149
|
-
|
168
|
+
recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome)
|
169
|
+
potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
|
170
|
+
results = Hash.new
|
171
|
+
evaluators.each do |evaluator|
|
172
|
+
t1 = Time.new
|
173
|
+
if Evoc::Evaluate.respond_to?(evaluator)
|
174
|
+
results[evaluator] = Hash.new
|
175
|
+
method_params = Evoc::Evaluate.method(evaluator).parameters.map(&:second)
|
176
|
+
params = potential_params.select {|k,v| method_params.include?(k)}
|
177
|
+
results[evaluator]['value'] = Evoc::Evaluate.method(evaluator).call(params)
|
178
|
+
else
|
179
|
+
raise NoMethodError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
|
180
|
+
end
|
181
|
+
t2 = Time.new
|
182
|
+
results[evaluator]['time'] = TimeDifference.between(t1,t2).in_seconds.round(8)
|
183
|
+
end
|
184
|
+
return results
|
150
185
|
end
|
151
186
|
|
152
187
|
##
|
@@ -163,9 +198,9 @@ module Evoc
|
|
163
198
|
# where each consequent is the strongest given by the input measure
|
164
199
|
#
|
165
200
|
# @param: [String] measure the measure used to find the strongest rules
|
166
|
-
def unique_by(measure)
|
201
|
+
def unique_by(measure, rules: self)
|
167
202
|
selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
|
168
|
-
|
203
|
+
rules.each do |rule|
|
169
204
|
if !rule.get_measure(measure).value.nil?
|
170
205
|
key = rule.rhs.first
|
171
206
|
if selected_rules[key].nil?
|
@@ -223,7 +258,7 @@ module Evoc
|
|
223
258
|
csv << ['rule'] + defined_measures
|
224
259
|
self.each do |rule|
|
225
260
|
row = CSV::Row.new([],[],false)
|
226
|
-
row << rule.
|
261
|
+
row << rule.human_name
|
227
262
|
defined_measures.each do |m|
|
228
263
|
row << rule.get_measure(m).value
|
229
264
|
end
|
@@ -287,11 +322,9 @@ module Evoc
|
|
287
322
|
end
|
288
323
|
|
289
324
|
def ==other
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
self.map {|r| r.get_measure(m)} == other.map {|r| r.get_measure(m)}
|
294
|
-
end
|
325
|
+
self_rules = self.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
|
326
|
+
other_rules = other.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
|
327
|
+
self_rules == other_rules
|
295
328
|
end
|
296
329
|
|
297
330
|
def size
|
data/lib/evoc/scenario.rb
CHANGED
@@ -53,30 +53,6 @@ module Evoc
|
|
53
53
|
comparison
|
54
54
|
end
|
55
55
|
|
56
|
-
##
|
57
|
-
# Executes a query given the current paramaters
|
58
|
-
# This results in a set of association rules, i.e., a recommendation
|
59
|
-
#
|
60
|
-
# Producing a recommendation is done through the following process:
|
61
|
-
#
|
62
|
-
# 1. Generate rules using a mining algorithm on the specified history
|
63
|
-
# 2. Calculate interestingness measures on the generated rules
|
64
|
-
# (optional) 3. Aggregate rules to further improve recommendation
|
65
|
-
# (optional) 4. Evaluate how good the recommendation is
|
66
|
-
#
|
67
|
-
# @return [Hash] containing the query + scenario + recommendation + other metadata
|
68
|
-
def call(evaluators: [])
|
69
|
-
#generate recommendation in cache (generate rules + measures on rules)
|
70
|
-
self.recommendation
|
71
|
-
|
72
|
-
# evaluate if requested
|
73
|
-
if !evaluators.empty?
|
74
|
-
Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
|
75
|
-
end
|
76
|
-
# build return hash
|
77
|
-
recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
|
78
|
-
return self.to_h.merge(recommendation)
|
79
|
-
end
|
80
56
|
|
81
57
|
def to_h
|
82
58
|
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
|
@@ -88,16 +64,6 @@ module Evoc
|
|
88
64
|
return hash
|
89
65
|
end
|
90
66
|
|
91
|
-
def recommendation
|
92
|
-
Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
|
93
|
-
query: self.query,
|
94
|
-
model_start: self.model_start,
|
95
|
-
model_end: self.model_end,
|
96
|
-
max_size: self.max_size,
|
97
|
-
aggregator: self.aggregator,
|
98
|
-
measures: self.measures)
|
99
|
-
end
|
100
|
-
|
101
67
|
def recommendation?
|
102
68
|
Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
|
103
69
|
query: self.query,
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Extending the rubytree gem with some additional methods
|
2
|
+
# see: http://rubytree.anupamsg.me/
|
3
|
+
module Tree
|
4
|
+
class TreeNode
|
5
|
+
|
6
|
+
##
|
7
|
+
# @return the right siblings of the current node
|
8
|
+
def right_siblings
|
9
|
+
if self.is_last_sibling?
|
10
|
+
return []
|
11
|
+
else
|
12
|
+
return [self.next_sibling] + self.next_sibling.right_siblings
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# @return the left siblings of the current node
|
18
|
+
def left_siblings
|
19
|
+
if self.is_first_sibling?
|
20
|
+
return []
|
21
|
+
else
|
22
|
+
return [self.previous_sibling] + self.previous_sibling.left_siblings
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/evoc/tx_store.rb
CHANGED
data/lib/evoc/util.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
module Evoc
|
3
|
+
module Util
|
4
|
+
# helper function to generate a lattice so we can easily come up with tests for the closed rules mining
|
5
|
+
# examples nodes: [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
|
6
|
+
# first elem is item name
|
7
|
+
# second elem is the txes where this item changes
|
8
|
+
def self.lattice(nodes,filter: nil)
|
9
|
+
(1..nodes.size).each do |n|
|
10
|
+
nodes.combination(n).each do |comb|
|
11
|
+
# [['a',[1,2]],['b',[2,3]]]
|
12
|
+
union = comb.map(&:first).join(',')
|
13
|
+
frequency = comb.map(&:second).inject(&:&).size
|
14
|
+
if filter =~ union
|
15
|
+
if frequency > 0
|
16
|
+
printf("%#{nodes.size*2}s",[union,frequency].join(':'))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
puts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# helper function for generating a txstore from the following format
|
25
|
+
# [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
|
26
|
+
# (same structure as used for lattice creation)
|
27
|
+
def self.nodes2txstore(nodes)
|
28
|
+
txes = nodes.map(&:second).inject(&:|)
|
29
|
+
store = Evoc::TxStore.new
|
30
|
+
txes.each do |id|
|
31
|
+
items = nodes.select {|n| n.second.include?(id)}.map(&:first)
|
32
|
+
store << Evoc::Tx.new(id: id, items: items)
|
33
|
+
end
|
34
|
+
return(store)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/evoc/version.rb
CHANGED
data/lib/evoc_cli/experiment.rb
CHANGED
@@ -1,10 +1,20 @@
|
|
1
1
|
require_relative 'cli_helper'
|
2
|
+
# override printing of help text as the default does not respect spaces and adds newlines
|
3
|
+
class Thor
|
4
|
+
module Shell
|
5
|
+
class Basic
|
6
|
+
def print_wrapped(message, options = {})
|
7
|
+
stdout.puts message
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
2
12
|
|
3
13
|
module EvocCLI
|
4
14
|
class Experiment < Thor
|
5
15
|
class_option :case_id, type: :string, desc: "Specify case identifier."
|
6
16
|
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
|
-
class_option :transactions, :aliases => '-t', :type => :string, :
|
17
|
+
class_option :transactions, :aliases => '-t', :type => :string, :desc => "Path to change-history"
|
8
18
|
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
9
19
|
|
10
20
|
##
|
@@ -35,6 +45,7 @@ module EvocCLI
|
|
35
45
|
desc: "Percentage of items to select for each query"
|
36
46
|
method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
|
37
47
|
method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
|
48
|
+
method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
|
38
49
|
desc "generate_queries [options]", "Generate queries from <transactions>"
|
39
50
|
def generate_queries
|
40
51
|
#MemoryProfiler.start('create_queries',30)
|
@@ -59,7 +70,52 @@ module EvocCLI
|
|
59
70
|
desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
|
60
71
|
method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
|
61
72
|
method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
|
73
|
+
method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
|
74
|
+
method_option :top_k, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER an evential unique consequents filter"
|
62
75
|
desc "execute_scenarios [options]",""
|
76
|
+
long_desc <<-LONGDESC
|
77
|
+
keyword description
|
78
|
+
------- -----------
|
79
|
+
|
80
|
+
case_id: user provided tag for the history used
|
81
|
+
granularity: granularity of the history used
|
82
|
+
scenario_id: a unique indentifier for this scenario
|
83
|
+
tx_id: the sha of the commit that the query was sampled from
|
84
|
+
tx_index: the index of this transaction in the used history (0 is oldest)
|
85
|
+
tx_size: the number of items in the transaction
|
86
|
+
query_size: the number of items in the query
|
87
|
+
query_percentage: query_size/tx_size
|
88
|
+
expected_outcome_size: tx - query
|
89
|
+
model_size: number of previous transactions relative to this one
|
90
|
+
model_hours: time span from the first transaction to this one
|
91
|
+
model_age: number of transactions between end of model and this transaction
|
92
|
+
max_size: transactions larger than this are filtered out before generating rules
|
93
|
+
filtered_model_size: model size after the max_size filtering
|
94
|
+
algorithm: the mining algorithm used to generate the recommendation
|
95
|
+
aggregator: the aggregation function used to aggregate the rules of the recommendation
|
96
|
+
measures: the interestingnessmeasures used to rank each rule
|
97
|
+
recommendation_tag: a unique identifiter of the rules used as a basis for the recommendation
|
98
|
+
time_rulegeneration: how long it took to generate the rules
|
99
|
+
time_measurecalculation: how long it took to calculate the measures for each rule
|
100
|
+
time_aggregation: how long it took to aggregate the rules
|
101
|
+
number_of_baserules: number of rules before aggregation
|
102
|
+
number_of_rules: number of rules after aggregation (equal to number_of_baserules when not aggregating)
|
103
|
+
number_of_hyperrules: number of hyper rules after aggregating
|
104
|
+
mean_hyper_coefficient: average number of rules aggregated in each hyper rule
|
105
|
+
largest_antecedent: number of items in the largest antecedent (lhs of rule)
|
106
|
+
t_ap: average precision where ties are accounted for
|
107
|
+
ap: the average precision
|
108
|
+
precision: ratio of correct to incorrect items
|
109
|
+
precision10: ratio of correct to incorrect items in the top 10
|
110
|
+
recall: ratio of correct items in recommendation to full set of expected items
|
111
|
+
recall19: ratio of correct items in recommendation to full set of expected items in the top 10
|
112
|
+
mean_confidence: the average confidence of the rules in this recommendation
|
113
|
+
discernibility: the number of uniquely weighted rules to the number of rules
|
114
|
+
applicable: 1 if rules were generated, 0 otherwise
|
115
|
+
f1: the f1 measure
|
116
|
+
first_relevant: the rank of the first correct item
|
117
|
+
last_relevant: the rank of the last correct item
|
118
|
+
LONGDESC
|
63
119
|
def execute_scenarios
|
64
120
|
if !options[:permutation].nil?
|
65
121
|
STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
|
data/lib/evoc_helper.rb
CHANGED
@@ -28,7 +28,10 @@ require 'logger'
|
|
28
28
|
require 'zip'
|
29
29
|
require 'zip/filesystem'
|
30
30
|
require 'set'
|
31
|
+
require 'tempfile'
|
32
|
+
require 'fileutils'
|
31
33
|
require 'algorithms' # various efficient data structures
|
34
|
+
require 'tree' #general purpose tree structure
|
32
35
|
require 'mathn' # enhances the Rational (and others) number type
|
33
36
|
Evoc::Env.load('google_hash',"please install to improve performance")
|
34
37
|
#Evoc::Env.load('nmatrix')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubytree
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
153
167
|
description:
|
154
168
|
email:
|
155
169
|
- mail@thomasrolfsnes.com
|
@@ -172,12 +186,15 @@ files:
|
|
172
186
|
- evoc.gemspec
|
173
187
|
- lib/evoc.rb
|
174
188
|
- lib/evoc/algorithm.rb
|
189
|
+
- lib/evoc/algorithms/closed_rules.rb
|
175
190
|
- lib/evoc/algorithms/top_k.rb
|
176
191
|
- lib/evoc/analyze.rb
|
177
192
|
- lib/evoc/array.rb
|
178
193
|
- lib/evoc/evaluate.rb
|
179
194
|
- lib/evoc/exceptions/aggregation_error.rb
|
195
|
+
- lib/evoc/exceptions/configuration_error.rb
|
180
196
|
- lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb
|
197
|
+
- lib/evoc/exceptions/format_error.rb
|
181
198
|
- lib/evoc/exceptions/measure_calculation_error.rb
|
182
199
|
- lib/evoc/exceptions/no_changed_items_in_changes.rb
|
183
200
|
- lib/evoc/exceptions/no_changes_in_json_object.rb
|
@@ -206,8 +223,10 @@ files:
|
|
206
223
|
- lib/evoc/rule_store.rb
|
207
224
|
- lib/evoc/scenario.rb
|
208
225
|
- lib/evoc/svd.rb
|
226
|
+
- lib/evoc/tree/tree_node.rb
|
209
227
|
- lib/evoc/tx.rb
|
210
228
|
- lib/evoc/tx_store.rb
|
229
|
+
- lib/evoc/util.rb
|
211
230
|
- lib/evoc/version.rb
|
212
231
|
- lib/evoc_cli/analyze.rb
|
213
232
|
- lib/evoc_cli/cli_helper.rb
|