evoc 3.6.2 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/evoc.gemspec +1 -0
- data/lib/evoc/algorithm.rb +72 -5
- data/lib/evoc/algorithms/closed_rules.rb +145 -0
- data/lib/evoc/evaluate.rb +233 -58
- data/lib/evoc/exceptions/configuration_error.rb +6 -0
- data/lib/evoc/exceptions/format_error.rb +6 -0
- data/lib/evoc/experiment.rb +51 -8
- data/lib/evoc/recommendation_cache.rb +14 -17
- data/lib/evoc/rule.rb +8 -4
- data/lib/evoc/rule_store.rb +59 -26
- data/lib/evoc/scenario.rb +0 -34
- data/lib/evoc/tree/tree_node.rb +26 -0
- data/lib/evoc/tx_store.rb +8 -0
- data/lib/evoc/util.rb +37 -0
- data/lib/evoc/version.rb +1 -1
- data/lib/evoc_cli/experiment.rb +57 -1
- data/lib/evoc_helper.rb +3 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2b867c7a5e05b3c2be58b9dd361554a43f3f277c
|
4
|
+
data.tar.gz: 9d00091a6fd7685f048930889aaf252df5aa6634
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f89dbef20f735e0f8c6b8f7104ea8b38a4dd118707089b17acdf529591fd652478406047cb5d4b08e18afb746d3edf46dc7cf9ac7eb208d214f91e9050841c12
|
7
|
+
data.tar.gz: 7e1955af5653df5d7afd986e9d522178b32d10c26771833e4d37aec910dd93208de56c8a6f83bd21faeb8796b098da808f72a05c9f6e3caf1100064fd20d7bb0
|
data/.gitignore
CHANGED
data/evoc.gemspec
CHANGED
data/lib/evoc/algorithm.rb
CHANGED
@@ -20,8 +20,8 @@ module Evoc
|
|
20
20
|
Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
21
21
|
elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
|
22
22
|
Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
|
23
|
-
elsif Evoc::Algorithm.respond_to?(algorithm
|
24
|
-
Evoc::Algorithm.method(algorithm
|
23
|
+
elsif Evoc::Algorithm.respond_to?(algorithm)
|
24
|
+
Evoc::Algorithm.method(algorithm).call(tx_store:tx_store,query:query)
|
25
25
|
else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
|
26
26
|
end
|
27
27
|
end
|
@@ -105,6 +105,70 @@ module Evoc
|
|
105
105
|
end
|
106
106
|
|
107
107
|
|
108
|
+
def self.not_subsumed(tx_store:, query:)
|
109
|
+
#initial filter, we consider all txes where something in the query changed
|
110
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
111
|
+
# now find what subsets of the query changed in each tx
|
112
|
+
trie = Containers::Trie.new
|
113
|
+
query_changed_in.each do |tx_id|
|
114
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
115
|
+
antecedent = (query & tx.items)
|
116
|
+
consequents = (tx.items - antecedent)
|
117
|
+
if consequents.size != 0
|
118
|
+
consequents.each do |consequent|
|
119
|
+
entry = "#{consequent.to_s}#{antecedent.join('')}"
|
120
|
+
if trie.get(entry).nil?
|
121
|
+
puts "ADDED #{entry}"
|
122
|
+
trie.push(entry,consequent.to_s)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
return trie
|
128
|
+
# now generate rules
|
129
|
+
# rule_store = Evoc::RuleStore.new(query: query)
|
130
|
+
# rules.each do |consequent,antecedents|
|
131
|
+
# antecedents.each do |antecedent|
|
132
|
+
# rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
|
133
|
+
# end
|
134
|
+
# end
|
135
|
+
# return rule_store
|
136
|
+
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Find the largest rules for each unique consequent
|
140
|
+
def self.largest_rules(tx_store:,query:)
|
141
|
+
#initial filter, we consider all txes where something in the query changed
|
142
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
143
|
+
# now find what subsets of the query changed in each tx
|
144
|
+
rules = Hash.new
|
145
|
+
query_changed_in.each do |tx_id|
|
146
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
147
|
+
antecedent = (query & tx.items)
|
148
|
+
consequents = (tx.items - antecedent)
|
149
|
+
if consequents.size != 0
|
150
|
+
consequents.each do |consequent|
|
151
|
+
if rules[consequent].nil?
|
152
|
+
rules[consequent] = Set.new([antecedent]) # new consequent
|
153
|
+
elsif antecedent.size > rules[consequent].first.size # larger antecedent
|
154
|
+
rules[consequent] = Set.new([antecedent])
|
155
|
+
elsif antecedent.size == rules[consequent].first.size # equally large antecedent
|
156
|
+
rules[consequent] << antecedent
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
# now generate rules
|
162
|
+
rule_store = Evoc::RuleStore.new(query: query)
|
163
|
+
rules.each do |consequent,antecedents|
|
164
|
+
antecedents.each do |antecedent|
|
165
|
+
rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
return rule_store
|
169
|
+
end
|
170
|
+
|
171
|
+
|
108
172
|
##
|
109
173
|
# TARMAQ
|
110
174
|
# find largest subsets in @query with evidence in @tx_store version
|
@@ -113,7 +177,6 @@ module Evoc
|
|
113
177
|
#initial filter, we consider all txes where something in the query changed
|
114
178
|
query_changed_in = tx_store.transactions_of_list(query)
|
115
179
|
# now find what subsets of the query changed in each tx
|
116
|
-
rules = Hash.new
|
117
180
|
query_changed_in.each do |tx_id|
|
118
181
|
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
119
182
|
largest_match_in_query = (query & tx.items)
|
@@ -134,14 +197,18 @@ module Evoc
|
|
134
197
|
###
|
135
198
|
## rose
|
136
199
|
###
|
137
|
-
def self.
|
200
|
+
def self.rose(tx_store:,query:)
|
138
201
|
qs = query.size
|
139
202
|
self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
|
140
203
|
end
|
141
204
|
|
142
|
-
def self.
|
205
|
+
def self.co_change(tx_store:, query:)
|
143
206
|
self.cached_rule_range(1,1,tx_store: tx_store, query: query)
|
144
207
|
end
|
145
208
|
|
209
|
+
def self.closed_rules(tx_store:, query:)
|
210
|
+
Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
|
211
|
+
end
|
212
|
+
|
146
213
|
end # Algorithm
|
147
214
|
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
module Evoc
|
2
|
+
class ClosedRules
|
3
|
+
def self.closed_rules(tx_store:,query:)
|
4
|
+
# @@store = tx_store
|
5
|
+
# create initial trees, one tree per consequent
|
6
|
+
tree = self.initialize_tree(tx_store,query)
|
7
|
+
# puts "INIT TREE:"
|
8
|
+
# tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
|
9
|
+
closed_rules = Evoc::RuleStore.new(query: query)
|
10
|
+
tree.children.each do |consequent|
|
11
|
+
self.extend_nodes(consequent).each do |frequency, closed_sets|
|
12
|
+
closed_sets.each do |closed_set|
|
13
|
+
antecedent = closed_set - consequent.name
|
14
|
+
closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
return closed_rules
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def self.initialize_tree(tx_store, query)
|
23
|
+
tree = Tree::TreeNode.new([])
|
24
|
+
# find all items that changed with something in the query
|
25
|
+
query_changed_in = tx_store.transactions_of_list(query)
|
26
|
+
# store all items from the query that have changed with each consequent
|
27
|
+
query_changed_in.each do |tx_id|
|
28
|
+
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
29
|
+
antecedent = (query & tx.items)
|
30
|
+
consequents = (tx.items - antecedent)
|
31
|
+
if consequents.size != 0
|
32
|
+
consequents.each do |consequent|
|
33
|
+
consequent_key = [consequent.to_s]
|
34
|
+
if tree[consequent_key].nil?
|
35
|
+
# initialize candidates
|
36
|
+
tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
37
|
+
end
|
38
|
+
txes_consequent = tree[consequent_key].content
|
39
|
+
antecedent.each do |item|
|
40
|
+
union = [item.to_s,consequent.to_s]
|
41
|
+
if tree[consequent_key][union].nil?
|
42
|
+
txes_union = tx_store.transactions_of(item) & txes_consequent
|
43
|
+
tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
return(tree)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.extend_nodes(root,closed_rules: {})
|
53
|
+
current_node = root.first_child
|
54
|
+
while(!current_node.nil?) do
|
55
|
+
a = current_node
|
56
|
+
b = a.next_sibling
|
57
|
+
while(!b.nil?) do
|
58
|
+
# print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
|
59
|
+
ab = a.name | b.name
|
60
|
+
a_txes = a.content
|
61
|
+
b_txes = b.content
|
62
|
+
ab_txes = a_txes & b_txes
|
63
|
+
# check properties
|
64
|
+
# 1. when txes are the same
|
65
|
+
# - remove B
|
66
|
+
# - replace all A with union of A and B
|
67
|
+
if ab_txes.size > 0
|
68
|
+
case self.compare(a_txes,b_txes)
|
69
|
+
when 'EQUAL'
|
70
|
+
# puts " EQUAL"
|
71
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
72
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
73
|
+
temp = b.previous_sibling
|
74
|
+
root.remove!(b)
|
75
|
+
b = temp
|
76
|
+
a.each {|n| n.rename(ab | n.name)}
|
77
|
+
when 'A_IN_B'
|
78
|
+
# puts " A in B"
|
79
|
+
# puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
|
80
|
+
a.each {|n| n.rename(ab | n.name)}
|
81
|
+
when 'B_IN_A'
|
82
|
+
# puts " B in A"
|
83
|
+
# puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
|
84
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
85
|
+
temp = b.previous_sibling
|
86
|
+
root.remove!(b)
|
87
|
+
b = temp
|
88
|
+
a << Tree::TreeNode.new(ab,ab_txes)
|
89
|
+
when 'NOT_EQUAL'
|
90
|
+
# puts " NOT EQUAL"
|
91
|
+
# puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
|
92
|
+
a << Tree::TreeNode.new(ab,ab_txes)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
# puts "NEW TREE:"
|
96
|
+
# root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
|
97
|
+
b = b.next_sibling
|
98
|
+
# puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
|
99
|
+
# puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
|
100
|
+
# puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
|
101
|
+
end # siblings.each
|
102
|
+
if !a.children.empty?
|
103
|
+
# puts "TRAVERSING DOWN"
|
104
|
+
self.extend_nodes(a, closed_rules: closed_rules)
|
105
|
+
end
|
106
|
+
# add node as closed rule if not subsumed by another rule already added
|
107
|
+
rule_frequency = a.content.size
|
108
|
+
rule = a.name
|
109
|
+
if closed_rules[rule_frequency].nil?
|
110
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
111
|
+
closed_rules[rule_frequency] = [rule]
|
112
|
+
else
|
113
|
+
if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
|
114
|
+
# puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
115
|
+
closed_rules[rule_frequency] << rule
|
116
|
+
else
|
117
|
+
# puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
current_node = current_node.next_sibling
|
121
|
+
end # children.each
|
122
|
+
return(closed_rules)
|
123
|
+
end #extend_nodes
|
124
|
+
|
125
|
+
def self.compare(a,b)
|
126
|
+
if a == b
|
127
|
+
return 'EQUAL'
|
128
|
+
# 2. when A is a subset of B
|
129
|
+
# - replace all A with union of A and B
|
130
|
+
elsif (a - b).empty?
|
131
|
+
return 'A_IN_B'
|
132
|
+
# 2. when B is a subset of A
|
133
|
+
# - remove B
|
134
|
+
# - add the union as new child
|
135
|
+
elsif (b - a).empty?
|
136
|
+
return 'B_IN_A'
|
137
|
+
# 4. contain different elements
|
138
|
+
# - add the union as new child
|
139
|
+
else
|
140
|
+
return 'NOT_EQUAL'
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
data/lib/evoc/evaluate.rb
CHANGED
@@ -1,42 +1,251 @@
|
|
1
1
|
module Evoc
|
2
|
-
|
2
|
+
module Evaluate
|
3
3
|
extend Logging
|
4
4
|
|
5
|
+
def self.validateInput(input)
|
6
|
+
# verify format
|
7
|
+
if !input.is_a?(Array) || # not an array
|
8
|
+
!input.first.is_a?(Array) || # not containg an array
|
9
|
+
![0,1].include?(input.first.first) # items are not 0s and 1s
|
10
|
+
raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected list of list of 0s and 1s, input was: #{input}"
|
11
|
+
end
|
12
|
+
end
|
5
13
|
|
6
|
-
def self.
|
7
|
-
if
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
14
|
+
def self.mean_confidence(rules:)
|
15
|
+
if rules.empty? then return nil end
|
16
|
+
return (rules.inject(0) {|sum,r| sum + r.m_confidence.value}/rules.size).to_f
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.discernibility(rec:)
|
20
|
+
# AP is 0 for the empty list
|
21
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
22
|
+
return nil
|
23
|
+
end
|
24
|
+
self.validateInput(rec)
|
25
|
+
|
26
|
+
rec_size = 0
|
27
|
+
rec_clusters = 0
|
28
|
+
|
29
|
+
rec.each do |c|
|
30
|
+
rec_clusters = rec_clusters + 1
|
31
|
+
c.each do |e|
|
32
|
+
rec_size = rec_size + 1
|
12
33
|
end
|
13
|
-
elsif match = /top10_recall/.match(evaluator)
|
14
|
-
self.top10_recall(recommendation,expected_outcome)
|
15
|
-
else raise ArgumentError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
|
16
34
|
end
|
35
|
+
return (rec_clusters/rec_size).to_f
|
36
|
+
|
17
37
|
end
|
18
38
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
39
|
+
def self.applicable(rec:)
|
40
|
+
if rec.is_a?(Array)
|
41
|
+
(rec <=> []).abs
|
42
|
+
else
|
43
|
+
raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected an array, input was: #{input}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# @return the f1 score (preision/recall harmonic mean)
|
49
|
+
def self.f1(rec:,exp:)
|
50
|
+
# AP is 0 for the empty list
|
51
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
52
|
+
return nil
|
53
|
+
end
|
54
|
+
self.validateInput(rec)
|
55
|
+
|
56
|
+
rec_size = 0
|
57
|
+
rec_correct = 0
|
58
|
+
|
59
|
+
rec.each do |c|
|
60
|
+
c.each do |e|
|
61
|
+
rec_size = rec_size + 1
|
62
|
+
rec_correct = rec_correct + e
|
63
|
+
end
|
64
|
+
end
|
65
|
+
return (2*rec_correct/(rec_size + exp)).to_f
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
##
|
70
|
+
# @return the rank of the first relevant itemjk
|
71
|
+
def self.first_relevant(rec:)
|
72
|
+
# AP is 0 for the empty list
|
73
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
self.validateInput(rec)
|
77
|
+
|
78
|
+
last_checked = 1
|
79
|
+
rec.each do |c|
|
80
|
+
c.each do |e|
|
81
|
+
if e == 1
|
82
|
+
return last_checked
|
83
|
+
end
|
84
|
+
last_checked = last_checked + 1
|
31
85
|
end
|
86
|
+
end
|
87
|
+
return nil
|
32
88
|
end
|
33
89
|
|
90
|
+
##
|
91
|
+
# @return the rank of the last relevant itemjk
|
92
|
+
def self.last_relevant(rec:)
|
93
|
+
# AP is 0 for the empty list
|
94
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
95
|
+
return nil
|
96
|
+
end
|
97
|
+
self.validateInput(rec)
|
98
|
+
|
99
|
+
size = rec.inject(0) {|sum,c| sum + c.size}
|
100
|
+
last_checked = size
|
101
|
+
rec.reverse_each do |c|
|
102
|
+
c.reverse_each do |e|
|
103
|
+
if e == 1
|
104
|
+
return last_checked
|
105
|
+
end
|
106
|
+
last_checked = last_checked - 1
|
107
|
+
end
|
108
|
+
end
|
109
|
+
return nil
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.recall10(rec:,exp: nil)
|
113
|
+
# AP is 0 for the empty list
|
114
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
115
|
+
return nil
|
116
|
+
end
|
117
|
+
self.validateInput(rec)
|
118
|
+
return self.recall(rec: [rec.take(10).flatten.take(10)],exp: exp)
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.precision10(rec:,exp: nil)
|
122
|
+
# AP is 0 for the empty list
|
123
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
124
|
+
return nil
|
125
|
+
end
|
126
|
+
self.validateInput(rec)
|
127
|
+
return self.precision(rec: [rec.take(10).flatten.take(10)])
|
128
|
+
end
|
129
|
+
|
130
|
+
def self.precision(rec:,exp: nil)
|
131
|
+
# AP is 0 for the empty list
|
132
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
133
|
+
return nil
|
134
|
+
end
|
135
|
+
self.validateInput(rec)
|
136
|
+
|
137
|
+
size_rec = rec.inject(0) {|sum,c| sum + c.size}
|
138
|
+
num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
|
139
|
+
|
140
|
+
return (num_correct_in_rec/size_rec).to_f
|
141
|
+
end
|
142
|
+
|
143
|
+
def self.recall(rec:,exp: nil)
|
144
|
+
# AP is 0 for the empty list
|
145
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
146
|
+
return nil
|
147
|
+
end
|
148
|
+
self.validateInput(rec)
|
149
|
+
|
150
|
+
num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
|
151
|
+
|
152
|
+
if exp.nil?
|
153
|
+
return num_correct_in_rec
|
154
|
+
else
|
155
|
+
if num_correct_in_rec > exp
|
156
|
+
raise ArgumentError, "Found more relevant items than the provided number of relevant items"
|
157
|
+
end
|
158
|
+
return (num_correct_in_rec/exp).to_f
|
159
|
+
end
|
160
|
+
end
|
161
|
+
# clustered recommendation is expected to be a sorted list V
|
162
|
+
# where V = [V1,V2,..Vn]
|
163
|
+
# and Vi is a cluster of items with the same weight like [rel_1,rel_2,..,rel_n]
|
164
|
+
# where rel_i is 1 if the item is relevant and 0 if not
|
165
|
+
|
166
|
+
# r_p : relevant items in previous groups
|
167
|
+
# i_p : index previous group
|
168
|
+
# r_g : relevant items in group
|
169
|
+
# n_g : items in group
|
170
|
+
# i : index of current item
|
171
|
+
def self.t_ap(rec:,exp: nil)
|
172
|
+
# AP is 0 for the empty list
|
173
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
174
|
+
return nil
|
175
|
+
end
|
176
|
+
self.validateInput(rec)
|
177
|
+
|
178
|
+
ap = 0
|
179
|
+
r_p = 0
|
180
|
+
i_p = 0
|
181
|
+
rec.each do |cluster|
|
182
|
+
r_g = cluster.inject(&:+).to_r
|
183
|
+
n_g = cluster.size.to_r
|
184
|
+
cluster.each_with_index do |_,i|
|
185
|
+
i = i_p + i + 1
|
186
|
+
chance_relevant = r_g/n_g
|
187
|
+
avg_previous_rel = if (n_g == 1)
|
188
|
+
(r_p + 1) * (1/i)
|
189
|
+
else
|
190
|
+
(r_p + (i - i_p - 1)*((r_g-1)/(n_g-1)) + 1) * (1/i)
|
191
|
+
end
|
192
|
+
|
193
|
+
item_ap_contribution = chance_relevant * avg_previous_rel
|
194
|
+
|
195
|
+
ap = ap + item_ap_contribution
|
196
|
+
end
|
197
|
+
r_p = r_p + r_g
|
198
|
+
i_p = i_p + n_g
|
199
|
+
end
|
200
|
+
# if the number of relevant documents is not supplied
|
201
|
+
# assume that the recommendation contains all relevant documents
|
202
|
+
if exp.nil?
|
203
|
+
exp = r_p
|
204
|
+
else
|
205
|
+
if r_p > exp
|
206
|
+
raise ArgumentError, "Found more relevant items than the provided number of relevant items"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
return (r_p == 0 ? 0 : (ap/exp).to_f)
|
210
|
+
end
|
211
|
+
|
212
|
+
def self.ap(rec:,exp: nil)
|
213
|
+
# AP is 0 for the empty list
|
214
|
+
if rec.is_a?(Array) && rec.empty? # array and empty
|
215
|
+
return nil
|
216
|
+
end
|
217
|
+
self.validateInput(rec)
|
218
|
+
|
219
|
+
i = 0
|
220
|
+
correct_i = 0
|
221
|
+
ap = 0
|
222
|
+
|
223
|
+
rec.each do |cluster|
|
224
|
+
cluster.each do |item|
|
225
|
+
i = i + 1
|
226
|
+
correct_i = correct_i + item
|
227
|
+
precision_i = correct_i/i
|
228
|
+
ap = ap + (precision_i*item)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
if exp.nil?
|
233
|
+
exp = correct_i
|
234
|
+
else
|
235
|
+
if correct_i > exp
|
236
|
+
raise ArgumentError, "Found more relevant items than the provided number of relevant items"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
return (exp == 0 ? 0 : (ap/exp).to_f)
|
240
|
+
|
241
|
+
end
|
34
242
|
##
|
35
243
|
# calculate the average precision of the result based on an expected outcome
|
36
244
|
# @param [Array] recommendation a sorted array
|
37
245
|
# @param [Array] expected_outcome an array of items
|
38
246
|
# @return [Float] the average precision
|
39
|
-
def self.average_precision(recommendation,expected_outcome
|
247
|
+
def self.average_precision(recommendation,expected_outcome)
|
248
|
+
raise Error.new "#average_precision has been deprecated, use #ap instead"
|
40
249
|
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
41
250
|
if (expected_outcome.size > 0) & !recommendation.empty?
|
42
251
|
average_precision = 0
|
@@ -45,7 +254,7 @@ module Evoc
|
|
45
254
|
# sort rules by weight
|
46
255
|
# we first group rules with equal weights
|
47
256
|
# and then sort the groups by weight
|
48
|
-
recommendation.
|
257
|
+
recommendation.each do |items|
|
49
258
|
if !items.is_a?(Array) then items = [items] end
|
50
259
|
if items.first.class != expected_outcome.first.class
|
51
260
|
raise ArgumentError, "Expected outcome was of type #{expected_outcome.first.class}, while the item in the recommendation was of type #{items.first.class}"
|
@@ -71,39 +280,5 @@ module Evoc
|
|
71
280
|
nil
|
72
281
|
end
|
73
282
|
end
|
74
|
-
|
75
|
-
# calculate the grouped average precision of the result based on an expected outcome
|
76
|
-
def self.e_collected_average_precision(expected_outcome)
|
77
|
-
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
78
|
-
if (expected_outcome.size > 0) & !self.empty?
|
79
|
-
collected_average_precision = 0
|
80
|
-
correct_items = []
|
81
|
-
total_items_considered = []
|
82
|
-
# sort rules by weight
|
83
|
-
# we first group rules with equal weights
|
84
|
-
# and then sort the groups by weight
|
85
|
-
groups = self.group_by {|r| r.weight}.sort.reverse
|
86
|
-
groups.each do |(_,rules)|
|
87
|
-
items = rules.map(&:rhs).flatten.uniq
|
88
|
-
if (new_items = items - total_items_considered).size > 0
|
89
|
-
new_items.each {|item| total_items_considered << item}
|
90
|
-
if correct_in_group = (items & expected_outcome)
|
91
|
-
if correct_in_group.size > 0
|
92
|
-
# make sure that the new items havent already been added earlier
|
93
|
-
new_correct = (correct_in_group - correct_items)
|
94
|
-
# add new items
|
95
|
-
new_correct.each {|item| correct_items << item}
|
96
|
-
change_in_recall = new_correct.size.to_r/expected_outcome.size
|
97
|
-
precision_at_k = correct_items.size.to_r/total_items_considered.size
|
98
|
-
collected_average_precision += (precision_at_k * change_in_recall)
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
self.collected_average_precision = collected_average_precision.to_f
|
104
|
-
else
|
105
|
-
self.collected_average_precision = nil
|
106
|
-
end
|
107
|
-
end
|
108
283
|
end
|
109
284
|
end
|
data/lib/evoc/experiment.rb
CHANGED
@@ -78,6 +78,23 @@ module Evoc
|
|
78
78
|
# tx_id, query
|
79
79
|
#
|
80
80
|
def generate_queries
|
81
|
+
##
|
82
|
+
# write dict
|
83
|
+
##
|
84
|
+
if path = self.opts[:write_dict]
|
85
|
+
tmp = Tempfile.new('dict')
|
86
|
+
begin
|
87
|
+
tmp.puts("id,name")
|
88
|
+
Evoc::HistoryStore.base_history.int_2_name.each do |id,name|
|
89
|
+
tmp.puts("#{id},#{name}")
|
90
|
+
end
|
91
|
+
tmp.close
|
92
|
+
FileUtils.mv(tmp.path,path)
|
93
|
+
ensure
|
94
|
+
tmp.close
|
95
|
+
tmp.unlink
|
96
|
+
end
|
97
|
+
end
|
81
98
|
##
|
82
99
|
# WRITE CSV HEADER
|
83
100
|
CSV {|row| row << %W(tx_id query)}
|
@@ -124,7 +141,7 @@ module Evoc
|
|
124
141
|
# 2. randomly select X in specified = Y
|
125
142
|
# 3. randomly select Y in tx
|
126
143
|
elsif !random_sizes.empty? & !specified_sizes.empty?
|
127
|
-
specified_sizes.select! {|s| (s < tx_size) & (s >
|
144
|
+
specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
|
128
145
|
if randomly_sampled_size = specified_sizes.sample #2.
|
129
146
|
sampled_queries = [items.sample(randomly_sampled_size)] #3.
|
130
147
|
end
|
@@ -216,27 +233,53 @@ module Evoc
|
|
216
233
|
break
|
217
234
|
end
|
218
235
|
end
|
236
|
+
# get query
|
237
|
+
query_hash = query.to_h
|
238
|
+
# convert query string to array of items
|
239
|
+
query_hash['query'] = query_hash['query'].split(',').map(&:to_i)
|
240
|
+
# verify query before executing
|
241
|
+
if tx = Evoc::HistoryStore.base_history.get_tx(id: query_hash['tx_id'],id_type: :id)
|
242
|
+
if !(query_hash['query'] - tx.items).empty?
|
243
|
+
raise Evoc::Exceptions::ConfigurationError.new "The query generated from #{query_hash['tx_id']} was not a subset of the same tx in the loaded history. The query was: '#{query_hash['query']}', the tx was '#{tx.items}'"
|
244
|
+
end
|
245
|
+
else
|
246
|
+
raise Evoc::Exceptions::ConfigurationError.new "Could not find the tx: '#{query_hash['tx_id']}' from #{self.opts[:queries]} in the history #{self.opts[:transactions]}"
|
247
|
+
end
|
248
|
+
|
219
249
|
current_scenario = 1
|
250
|
+
last_error = 'no errors'
|
220
251
|
# - compact removes nil values (not used factors)
|
221
252
|
# - the splat operator '*' turns the array into parameters for #product
|
222
253
|
# - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
|
223
254
|
factors.first.product(*factors[1..-1]).each do |scenario|
|
224
255
|
# Print progress to stderr
|
225
|
-
STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}
|
256
|
+
STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
|
257
|
+
if invalid_configuration > 0
|
258
|
+
STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
|
259
|
+
else
|
260
|
+
STDERR.print " \r"
|
261
|
+
end
|
226
262
|
|
227
|
-
query_hash = query.to_h
|
228
|
-
# convert query to array
|
229
|
-
query_hash['query'] = query_hash['query'].split(',')
|
230
263
|
params = query_hash.merge(scenario.to_h)
|
231
264
|
params[:case_id] = self.opts[:case_id]
|
232
265
|
params[:granularity] = self.opts[:granularity]
|
233
266
|
# initialize scenario
|
234
|
-
|
267
|
+
s = Evoc::Scenario.new(params)
|
235
268
|
begin
|
236
|
-
|
237
|
-
|
269
|
+
Evoc::RecommendationCache.get_recommendation(algorithm: s.algorithm,
|
270
|
+
query: s.query,
|
271
|
+
model_start: s.model_start,
|
272
|
+
model_end: s.model_end,
|
273
|
+
max_size: s.max_size,
|
274
|
+
aggregator: s.aggregator,
|
275
|
+
measures: s.measures)
|
276
|
+
Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], top_k: self.opts[:top_k], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
|
277
|
+
result = Evoc::RecommendationCache.to_h(measures: s.measures)
|
278
|
+
# merge scenario params with result hash and dump as json
|
279
|
+
$stdout.puts s.to_h.merge(result).to_json
|
238
280
|
rescue ArgumentError => e
|
239
281
|
invalid_configuration += 1
|
282
|
+
last_error = e.message
|
240
283
|
end
|
241
284
|
current_scenario += 1
|
242
285
|
end
|
@@ -9,7 +9,7 @@ module Evoc
|
|
9
9
|
# time: the time it took to generate the currently cached recommendation
|
10
10
|
# model_size: the number of transactions used when generating the currently cached recommendation
|
11
11
|
class << self
|
12
|
-
attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :
|
12
|
+
attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.recommendation_cached?(algorithm:,
|
@@ -20,6 +20,7 @@ module Evoc
|
|
20
20
|
return self.tag == [algorithm,query,model_start,model_end,max_size].hash
|
21
21
|
end
|
22
22
|
|
23
|
+
|
23
24
|
def self.get_recommendation(algorithm:,
|
24
25
|
query:,
|
25
26
|
model_start:,
|
@@ -75,14 +76,13 @@ module Evoc
|
|
75
76
|
# @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
|
76
77
|
#
|
77
78
|
# @return [Hash[aggregator][evaluator][result]] the hash of results
|
78
|
-
def self.
|
79
|
+
def self.evaluate_last(evaluators: ,top_k: nil, unique_consequents: nil,expected_outcome:,measure_combination: )
|
79
80
|
if !self.last_recommendation.nil?
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
self.time_evaluation = TimeDifference.between(t1,t2).in_seconds.round(8)
|
81
|
+
self.evaluation = self.last_recommendation.evaluate_with(evaluators: evaluators,
|
82
|
+
top_k: top_k,
|
83
|
+
unique_consequents: unique_consequents,
|
84
|
+
expected_outcome: expected_outcome,
|
85
|
+
measure_combination: measure_combination)
|
86
86
|
else
|
87
87
|
STDERR.puts "TAG = #{self.tag}No recommendation to evaluate"
|
88
88
|
end
|
@@ -94,10 +94,7 @@ module Evoc
|
|
94
94
|
# time: 'execution time',
|
95
95
|
# filtered_model_size:
|
96
96
|
# number_of_rules :
|
97
|
-
#
|
98
|
-
# average_precision: ..,
|
99
|
-
# ..next evaluator..
|
100
|
-
# }
|
97
|
+
# average_precision:
|
101
98
|
# rules: [
|
102
99
|
# {
|
103
100
|
# lhs: [lhs]
|
@@ -118,7 +115,6 @@ module Evoc
|
|
118
115
|
recommendation_hash[:time_rulegeneration] = self.time_rulegeneration
|
119
116
|
recommendation_hash[:time_measurecalculation] = self.time_measurecalculation
|
120
117
|
recommendation_hash[:time_aggregation] = self.time_aggregation
|
121
|
-
recommendation_hash[:time_evaluation] = self.time_evaluation
|
122
118
|
recommendation_hash[:filtered_model_size] = self.filtered_model_size
|
123
119
|
recommendation_hash[:number_of_baserules] = self.base_recommendation.size
|
124
120
|
recommendation_hash[:number_of_rules] = self.last_recommendation.size
|
@@ -128,10 +124,11 @@ module Evoc
|
|
128
124
|
sum + r.get_measure('m_hyper_coefficient').value } / self.last_recommendation.size
|
129
125
|
recommendation_hash[:largest_antecedent] = self.last_recommendation.largest_antecedent
|
130
126
|
if !self.evaluation.nil?
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
127
|
+
self.evaluation.each do |evaluator,results|
|
128
|
+
recommendation_hash[evaluator] = results['value']
|
129
|
+
# time can also be added like this:
|
130
|
+
# recommendation_hash[evaluator+'_time'] = results['time']
|
131
|
+
end
|
135
132
|
end
|
136
133
|
recommendation_hash[:rules] = []
|
137
134
|
self.last_recommendation.each do |rule|
|
data/lib/evoc/rule.rb
CHANGED
@@ -21,15 +21,19 @@ module Evoc
|
|
21
21
|
name
|
22
22
|
end
|
23
23
|
|
24
|
+
def human_name
|
25
|
+
"#{human_lhs} -> #{human_rhs}"
|
26
|
+
end
|
27
|
+
|
24
28
|
def human_lhs
|
25
|
-
if !self.tx_store.nil? & self.lhs.all? {|i| i.is_a?(Numeric)}
|
26
|
-
self.lhs.map
|
29
|
+
if !self.tx_store.nil? # & self.lhs.all? {|i| i.is_a?(Numeric)}
|
30
|
+
self.tx_store.ints2names(self.lhs.map(&:to_i)).join(',')
|
27
31
|
end
|
28
32
|
end
|
29
33
|
|
30
34
|
def human_rhs
|
31
|
-
if !self.tx_store.nil?
|
32
|
-
self.rhs.map
|
35
|
+
if !self.tx_store.nil? #& self.rhs.all? {|i| i.is_a?(Numeric)}
|
36
|
+
self.tx_store.ints2names(self.rhs.map(&:to_i)).join(',')
|
33
37
|
end
|
34
38
|
end
|
35
39
|
|
data/lib/evoc/rule_store.rb
CHANGED
@@ -116,6 +116,33 @@ module Evoc
|
|
116
116
|
end
|
117
117
|
|
118
118
|
|
119
|
+
# Needed by Evaluate mixin
|
120
|
+
def evaluation_format(measures:, expected_outcome:)
|
121
|
+
current_weight = nil
|
122
|
+
current_group = []
|
123
|
+
recommendation = []
|
124
|
+
# sort and filter out duplicate consequents
|
125
|
+
self.sort_on(measures: measures, rules: self.unique_by(measures.first)).each do |r|
|
126
|
+
expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
|
127
|
+
weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
|
128
|
+
if current_weight.nil?
|
129
|
+
current_weight = weight_tag
|
130
|
+
end
|
131
|
+
if weight_tag == current_weight
|
132
|
+
current_group << expected
|
133
|
+
else
|
134
|
+
recommendation << current_group
|
135
|
+
current_group = [expected]
|
136
|
+
current_weight = weight_tag
|
137
|
+
end
|
138
|
+
end
|
139
|
+
# add last group if not empty
|
140
|
+
if !current_group.empty?
|
141
|
+
recommendation << current_group
|
142
|
+
end
|
143
|
+
return recommendation
|
144
|
+
end
|
145
|
+
|
119
146
|
##
|
120
147
|
# Evaluate this recommendation using the given evaluator
|
121
148
|
#
|
@@ -126,27 +153,35 @@ module Evoc
|
|
126
153
|
# @param [String] evaluator the method to use for evaluating
|
127
154
|
# @param [Array] expected_outcome the list of items to evaluate against
|
128
155
|
# @param [Array] measure_combination the list of measures used to first sort the recommendation
|
129
|
-
def evaluate_with(
|
156
|
+
def evaluate_with(evaluators:,expected_outcome:,measure_combination:,top_k: nil,unique_consequents: nil)
|
130
157
|
if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
|
131
|
-
|
132
|
-
logger.debug "#{__method__} params: evaluator: #{evaluator}, measure_combination: #{measure_combination}"
|
158
|
+
logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
|
133
159
|
# sort the rules on each combination and evaluate
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
# get the strongest unique rules
|
142
|
-
unique_rules = self.unique_by(measure_combination.first)
|
143
|
-
sorted_rules = self.sort_on(rules: unique_rules,measures: measure_combination)
|
144
|
-
end
|
145
|
-
# get the recommended items
|
146
|
-
recommendation = sorted_rules.map(&:rhs)
|
160
|
+
# if !top_k.nil?
|
161
|
+
# raise ArgumentError, "Top K must be a number" unless top_k.is_a?(Numeric)
|
162
|
+
# sorted_rules = sorted_rules.take(top_k)
|
163
|
+
# end
|
164
|
+
# convert rules into format used in evaluation
|
165
|
+
# map to 0/1 list where 1 is a correct item and 0 is not
|
166
|
+
# second item in each tuple gives the weight of the rule
|
147
167
|
# evaluate the sorted list against the expected outcome
|
148
|
-
|
149
|
-
|
168
|
+
recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome)
|
169
|
+
potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
|
170
|
+
results = Hash.new
|
171
|
+
evaluators.each do |evaluator|
|
172
|
+
t1 = Time.new
|
173
|
+
if Evoc::Evaluate.respond_to?(evaluator)
|
174
|
+
results[evaluator] = Hash.new
|
175
|
+
method_params = Evoc::Evaluate.method(evaluator).parameters.map(&:second)
|
176
|
+
params = potential_params.select {|k,v| method_params.include?(k)}
|
177
|
+
results[evaluator]['value'] = Evoc::Evaluate.method(evaluator).call(params)
|
178
|
+
else
|
179
|
+
raise NoMethodError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
|
180
|
+
end
|
181
|
+
t2 = Time.new
|
182
|
+
results[evaluator]['time'] = TimeDifference.between(t1,t2).in_seconds.round(8)
|
183
|
+
end
|
184
|
+
return results
|
150
185
|
end
|
151
186
|
|
152
187
|
##
|
@@ -163,9 +198,9 @@ module Evoc
|
|
163
198
|
# where each consequent is the strongest given by the input measure
|
164
199
|
#
|
165
200
|
# @param: [String] measure the measure used to find the strongest rules
|
166
|
-
def unique_by(measure)
|
201
|
+
def unique_by(measure, rules: self)
|
167
202
|
selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
|
168
|
-
|
203
|
+
rules.each do |rule|
|
169
204
|
if !rule.get_measure(measure).value.nil?
|
170
205
|
key = rule.rhs.first
|
171
206
|
if selected_rules[key].nil?
|
@@ -223,7 +258,7 @@ module Evoc
|
|
223
258
|
csv << ['rule'] + defined_measures
|
224
259
|
self.each do |rule|
|
225
260
|
row = CSV::Row.new([],[],false)
|
226
|
-
row << rule.
|
261
|
+
row << rule.human_name
|
227
262
|
defined_measures.each do |m|
|
228
263
|
row << rule.get_measure(m).value
|
229
264
|
end
|
@@ -287,11 +322,9 @@ module Evoc
|
|
287
322
|
end
|
288
323
|
|
289
324
|
def ==other
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
self.map {|r| r.get_measure(m)} == other.map {|r| r.get_measure(m)}
|
294
|
-
end
|
325
|
+
self_rules = self.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
|
326
|
+
other_rules = other.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
|
327
|
+
self_rules == other_rules
|
295
328
|
end
|
296
329
|
|
297
330
|
def size
|
data/lib/evoc/scenario.rb
CHANGED
@@ -53,30 +53,6 @@ module Evoc
|
|
53
53
|
comparison
|
54
54
|
end
|
55
55
|
|
56
|
-
##
|
57
|
-
# Executes a query given the current paramaters
|
58
|
-
# This results in a set of association rules, i.e., a recommendation
|
59
|
-
#
|
60
|
-
# Producing a recommendation is done through the following process:
|
61
|
-
#
|
62
|
-
# 1. Generate rules using a mining algorithm on the specified history
|
63
|
-
# 2. Calculate interestingness measures on the generated rules
|
64
|
-
# (optional) 3. Aggregate rules to further improve recommendation
|
65
|
-
# (optional) 4. Evaluate how good the recommendation is
|
66
|
-
#
|
67
|
-
# @return [Hash] containing the query + scenario + recommendation + other metadata
|
68
|
-
def call(evaluators: [])
|
69
|
-
#generate recommendation in cache (generate rules + measures on rules)
|
70
|
-
self.recommendation
|
71
|
-
|
72
|
-
# evaluate if requested
|
73
|
-
if !evaluators.empty?
|
74
|
-
Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
|
75
|
-
end
|
76
|
-
# build return hash
|
77
|
-
recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
|
78
|
-
return self.to_h.merge(recommendation)
|
79
|
-
end
|
80
56
|
|
81
57
|
def to_h
|
82
58
|
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
|
@@ -88,16 +64,6 @@ module Evoc
|
|
88
64
|
return hash
|
89
65
|
end
|
90
66
|
|
91
|
-
def recommendation
|
92
|
-
Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
|
93
|
-
query: self.query,
|
94
|
-
model_start: self.model_start,
|
95
|
-
model_end: self.model_end,
|
96
|
-
max_size: self.max_size,
|
97
|
-
aggregator: self.aggregator,
|
98
|
-
measures: self.measures)
|
99
|
-
end
|
100
|
-
|
101
67
|
def recommendation?
|
102
68
|
Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
|
103
69
|
query: self.query,
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Extending the rubytree gem with some additional methods
|
2
|
+
# see: http://rubytree.anupamsg.me/
|
3
|
+
module Tree
|
4
|
+
class TreeNode
|
5
|
+
|
6
|
+
##
|
7
|
+
# @return the right siblings of the current node
|
8
|
+
def right_siblings
|
9
|
+
if self.is_last_sibling?
|
10
|
+
return []
|
11
|
+
else
|
12
|
+
return [self.next_sibling] + self.next_sibling.right_siblings
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# @return the left siblings of the current node
|
18
|
+
def left_siblings
|
19
|
+
if self.is_first_sibling?
|
20
|
+
return []
|
21
|
+
else
|
22
|
+
return [self.previous_sibling] + self.previous_sibling.left_siblings
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/evoc/tx_store.rb
CHANGED
data/lib/evoc/util.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
module Evoc
|
3
|
+
module Util
|
4
|
+
# helper function to generate a lattice so we can easily come up with tests for the closed rules mining
|
5
|
+
# examples nodes: [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
|
6
|
+
# first elem is item name
|
7
|
+
# second elem is the txes where this item changes
|
8
|
+
def self.lattice(nodes,filter: nil)
|
9
|
+
(1..nodes.size).each do |n|
|
10
|
+
nodes.combination(n).each do |comb|
|
11
|
+
# [['a',[1,2]],['b',[2,3]]]
|
12
|
+
union = comb.map(&:first).join(',')
|
13
|
+
frequency = comb.map(&:second).inject(&:&).size
|
14
|
+
if filter =~ union
|
15
|
+
if frequency > 0
|
16
|
+
printf("%#{nodes.size*2}s",[union,frequency].join(':'))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
puts
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# helper function for generating a txstore from the following format
|
25
|
+
# [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
|
26
|
+
# (same structure as used for lattice creation)
|
27
|
+
def self.nodes2txstore(nodes)
|
28
|
+
txes = nodes.map(&:second).inject(&:|)
|
29
|
+
store = Evoc::TxStore.new
|
30
|
+
txes.each do |id|
|
31
|
+
items = nodes.select {|n| n.second.include?(id)}.map(&:first)
|
32
|
+
store << Evoc::Tx.new(id: id, items: items)
|
33
|
+
end
|
34
|
+
return(store)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/evoc/version.rb
CHANGED
data/lib/evoc_cli/experiment.rb
CHANGED
@@ -1,10 +1,20 @@
|
|
1
1
|
require_relative 'cli_helper'
|
2
|
+
# override printing of help text as the default does not respect spaces and adds newlines
|
3
|
+
class Thor
|
4
|
+
module Shell
|
5
|
+
class Basic
|
6
|
+
def print_wrapped(message, options = {})
|
7
|
+
stdout.puts message
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
2
12
|
|
3
13
|
module EvocCLI
|
4
14
|
class Experiment < Thor
|
5
15
|
class_option :case_id, type: :string, desc: "Specify case identifier."
|
6
16
|
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
|
-
class_option :transactions, :aliases => '-t', :type => :string, :
|
17
|
+
class_option :transactions, :aliases => '-t', :type => :string, :desc => "Path to change-history"
|
8
18
|
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
9
19
|
|
10
20
|
##
|
@@ -35,6 +45,7 @@ module EvocCLI
|
|
35
45
|
desc: "Percentage of items to select for each query"
|
36
46
|
method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
|
37
47
|
method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
|
48
|
+
method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
|
38
49
|
desc "generate_queries [options]", "Generate queries from <transactions>"
|
39
50
|
def generate_queries
|
40
51
|
#MemoryProfiler.start('create_queries',30)
|
@@ -59,7 +70,52 @@ module EvocCLI
|
|
59
70
|
desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
|
60
71
|
method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
|
61
72
|
method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
|
73
|
+
method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
|
74
|
+
method_option :top_k, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER an evential unique consequents filter"
|
62
75
|
desc "execute_scenarios [options]",""
|
76
|
+
long_desc <<-LONGDESC
|
77
|
+
keyword description
|
78
|
+
------- -----------
|
79
|
+
|
80
|
+
case_id: user provided tag for the history used
|
81
|
+
granularity: granularity of the history used
|
82
|
+
scenario_id: a unique indentifier for this scenario
|
83
|
+
tx_id: the sha of the commit that the query was sampled from
|
84
|
+
tx_index: the index of this transaction in the used history (0 is oldest)
|
85
|
+
tx_size: the number of items in the transaction
|
86
|
+
query_size: the number of items in the query
|
87
|
+
query_percentage: query_size/tx_size
|
88
|
+
expected_outcome_size: tx - query
|
89
|
+
model_size: number of previous transactions relative to this one
|
90
|
+
model_hours: time span from the first transaction to this one
|
91
|
+
model_age: number of transactions between end of model and this transaction
|
92
|
+
max_size: transactions larger than this are filtered out before generating rules
|
93
|
+
filtered_model_size: model size after the max_size filtering
|
94
|
+
algorithm: the mining algorithm used to generate the recommendation
|
95
|
+
aggregator: the aggregation function used to aggregate the rules of the recommendation
|
96
|
+
measures: the interestingnessmeasures used to rank each rule
|
97
|
+
recommendation_tag: a unique identifiter of the rules used as a basis for the recommendation
|
98
|
+
time_rulegeneration: how long it took to generate the rules
|
99
|
+
time_measurecalculation: how long it took to calculate the measures for each rule
|
100
|
+
time_aggregation: how long it took to aggregate the rules
|
101
|
+
number_of_baserules: number of rules before aggregation
|
102
|
+
number_of_rules: number of rules after aggregation (equal to number_of_baserules when not aggregating)
|
103
|
+
number_of_hyperrules: number of hyper rules after aggregating
|
104
|
+
mean_hyper_coefficient: average number of rules aggregated in each hyper rule
|
105
|
+
largest_antecedent: number of items in the largest antecedent (lhs of rule)
|
106
|
+
t_ap: average precision where ties are accounted for
|
107
|
+
ap: the average precision
|
108
|
+
precision: ratio of correct to incorrect items
|
109
|
+
precision10: ratio of correct to incorrect items in the top 10
|
110
|
+
recall: ratio of correct items in recommendation to full set of expected items
|
111
|
+
recall19: ratio of correct items in recommendation to full set of expected items in the top 10
|
112
|
+
mean_confidence: the average confidence of the rules in this recommendation
|
113
|
+
discernibility: the number of uniquely weighted rules to the number of rules
|
114
|
+
applicable: 1 if rules were generated, 0 otherwise
|
115
|
+
f1: the f1 measure
|
116
|
+
first_relevant: the rank of the first correct item
|
117
|
+
last_relevant: the rank of the last correct item
|
118
|
+
LONGDESC
|
63
119
|
def execute_scenarios
|
64
120
|
if !options[:permutation].nil?
|
65
121
|
STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
|
data/lib/evoc_helper.rb
CHANGED
@@ -28,7 +28,10 @@ require 'logger'
|
|
28
28
|
require 'zip'
|
29
29
|
require 'zip/filesystem'
|
30
30
|
require 'set'
|
31
|
+
require 'tempfile'
|
32
|
+
require 'fileutils'
|
31
33
|
require 'algorithms' # various efficient data structures
|
34
|
+
require 'tree' #general purpose tree structure
|
32
35
|
require 'mathn' # enhances the Rational (and others) number type
|
33
36
|
Evoc::Env.load('google_hash',"please install to improve performance")
|
34
37
|
#Evoc::Env.load('nmatrix')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - ">="
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: rubytree
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">="
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
153
167
|
description:
|
154
168
|
email:
|
155
169
|
- mail@thomasrolfsnes.com
|
@@ -172,12 +186,15 @@ files:
|
|
172
186
|
- evoc.gemspec
|
173
187
|
- lib/evoc.rb
|
174
188
|
- lib/evoc/algorithm.rb
|
189
|
+
- lib/evoc/algorithms/closed_rules.rb
|
175
190
|
- lib/evoc/algorithms/top_k.rb
|
176
191
|
- lib/evoc/analyze.rb
|
177
192
|
- lib/evoc/array.rb
|
178
193
|
- lib/evoc/evaluate.rb
|
179
194
|
- lib/evoc/exceptions/aggregation_error.rb
|
195
|
+
- lib/evoc/exceptions/configuration_error.rb
|
180
196
|
- lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb
|
197
|
+
- lib/evoc/exceptions/format_error.rb
|
181
198
|
- lib/evoc/exceptions/measure_calculation_error.rb
|
182
199
|
- lib/evoc/exceptions/no_changed_items_in_changes.rb
|
183
200
|
- lib/evoc/exceptions/no_changes_in_json_object.rb
|
@@ -206,8 +223,10 @@ files:
|
|
206
223
|
- lib/evoc/rule_store.rb
|
207
224
|
- lib/evoc/scenario.rb
|
208
225
|
- lib/evoc/svd.rb
|
226
|
+
- lib/evoc/tree/tree_node.rb
|
209
227
|
- lib/evoc/tx.rb
|
210
228
|
- lib/evoc/tx_store.rb
|
229
|
+
- lib/evoc/util.rb
|
211
230
|
- lib/evoc/version.rb
|
212
231
|
- lib/evoc_cli/analyze.rb
|
213
232
|
- lib/evoc_cli/cli_helper.rb
|