evoc 3.6.2 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8945b15a68ce28d5c3c9e8b10b2478f3fcbb139f
4
- data.tar.gz: 597aaf2639f69c61788312c030c76706e419b0bf
3
+ metadata.gz: 2b867c7a5e05b3c2be58b9dd361554a43f3f277c
4
+ data.tar.gz: 9d00091a6fd7685f048930889aaf252df5aa6634
5
5
  SHA512:
6
- metadata.gz: 804e7b6ec7b27c35b09697a44f5f20895efe94a31631c8068ba83861d69856e948820056b22571b10bd47ce09a2104dd73f21cd18df0bf49619df23813425d61
7
- data.tar.gz: 9ad3251c853afb583f0f257cec83cb09959aab339140cea5a7c032d48ff7e023a80f8239ae6b269d0357aafa0e8757c6912f6814fe8b85e51e285dd85e83e44d
6
+ metadata.gz: f89dbef20f735e0f8c6b8f7104ea8b38a4dd118707089b17acdf529591fd652478406047cb5d4b08e18afb746d3edf46dc7cf9ac7eb208d214f91e9050841c12
7
+ data.tar.gz: 7e1955af5653df5d7afd986e9d522178b32d10c26771833e4d37aec910dd93208de56c8a6f83bd21faeb8796b098da808f72a05c9f6e3caf1100064fd20d7bb0
data/.gitignore CHANGED
@@ -13,3 +13,5 @@ evoc.log
13
13
  /.history
14
14
  /GRTAGS
15
15
  *TAGS
16
+ *~
17
+ *#
data/evoc.gemspec CHANGED
@@ -27,4 +27,5 @@ Gem::Specification.new do |spec|
27
27
  spec.add_runtime_dependency "ruby-progressbar"
28
28
  spec.add_runtime_dependency "rubyzip"
29
29
  spec.add_runtime_dependency "algorithms"
30
+ spec.add_runtime_dependency "rubytree"
30
31
  end
@@ -20,8 +20,8 @@ module Evoc
20
20
  Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
21
21
  elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
22
22
  Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
23
- elsif Evoc::Algorithm.respond_to?(algorithm+'_algorithm')
24
- Evoc::Algorithm.method(algorithm+'_algorithm').call(tx_store:tx_store,query:query)
23
+ elsif Evoc::Algorithm.respond_to?(algorithm)
24
+ Evoc::Algorithm.method(algorithm).call(tx_store:tx_store,query:query)
25
25
  else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
26
26
  end
27
27
  end
@@ -105,6 +105,70 @@ module Evoc
105
105
  end
106
106
 
107
107
 
108
+ def self.not_subsumed(tx_store:, query:)
109
+ #initial filter, we consider all txes where something in the query changed
110
+ query_changed_in = tx_store.transactions_of_list(query)
111
+ # now find what subsets of the query changed in each tx
112
+ trie = Containers::Trie.new
113
+ query_changed_in.each do |tx_id|
114
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
115
+ antecedent = (query & tx.items)
116
+ consequents = (tx.items - antecedent)
117
+ if consequents.size != 0
118
+ consequents.each do |consequent|
119
+ entry = "#{consequent.to_s}#{antecedent.join('')}"
120
+ if trie.get(entry).nil?
121
+ puts "ADDED #{entry}"
122
+ trie.push(entry,consequent.to_s)
123
+ end
124
+ end
125
+ end
126
+ end
127
+ return trie
128
+ # now generate rules
129
+ # rule_store = Evoc::RuleStore.new(query: query)
130
+ # rules.each do |consequent,antecedents|
131
+ # antecedents.each do |antecedent|
132
+ # rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
133
+ # end
134
+ # end
135
+ # return rule_store
136
+ end
137
+
138
+ ##
139
+ # Find the largest rules for each unique consequent
140
+ def self.largest_rules(tx_store:,query:)
141
+ #initial filter, we consider all txes where something in the query changed
142
+ query_changed_in = tx_store.transactions_of_list(query)
143
+ # now find what subsets of the query changed in each tx
144
+ rules = Hash.new
145
+ query_changed_in.each do |tx_id|
146
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
147
+ antecedent = (query & tx.items)
148
+ consequents = (tx.items - antecedent)
149
+ if consequents.size != 0
150
+ consequents.each do |consequent|
151
+ if rules[consequent].nil?
152
+ rules[consequent] = Set.new([antecedent]) # new consequent
153
+ elsif antecedent.size > rules[consequent].first.size # larger antecedent
154
+ rules[consequent] = Set.new([antecedent])
155
+ elsif antecedent.size == rules[consequent].first.size # equally large antecedent
156
+ rules[consequent] << antecedent
157
+ end
158
+ end
159
+ end
160
+ end
161
+ # now generate rules
162
+ rule_store = Evoc::RuleStore.new(query: query)
163
+ rules.each do |consequent,antecedents|
164
+ antecedents.each do |antecedent|
165
+ rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
166
+ end
167
+ end
168
+ return rule_store
169
+ end
170
+
171
+
108
172
  ##
109
173
  # TARMAQ
110
174
  # find largest subsets in @query with evidence in @tx_store version
@@ -113,7 +177,6 @@ module Evoc
113
177
  #initial filter, we consider all txes where something in the query changed
114
178
  query_changed_in = tx_store.transactions_of_list(query)
115
179
  # now find what subsets of the query changed in each tx
116
- rules = Hash.new
117
180
  query_changed_in.each do |tx_id|
118
181
  tx = tx_store.get_tx(id:tx_id,id_type: :index)
119
182
  largest_match_in_query = (query & tx.items)
@@ -134,14 +197,18 @@ module Evoc
134
197
  ###
135
198
  ## rose
136
199
  ###
137
- def self.rose_algorithm(tx_store:,query:)
200
+ def self.rose(tx_store:,query:)
138
201
  qs = query.size
139
202
  self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
140
203
  end
141
204
 
142
- def self.co_change_algorithm(tx_store:, query:)
205
+ def self.co_change(tx_store:, query:)
143
206
  self.cached_rule_range(1,1,tx_store: tx_store, query: query)
144
207
  end
145
208
 
209
+ def self.closed_rules(tx_store:, query:)
210
+ Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
211
+ end
212
+
146
213
  end # Algorithm
147
214
  end
@@ -0,0 +1,145 @@
1
+ module Evoc
2
+ class ClosedRules
3
+ def self.closed_rules(tx_store:,query:)
4
+ # @@store = tx_store
5
+ # create initial trees, one tree per consequent
6
+ tree = self.initialize_tree(tx_store,query)
7
+ # puts "INIT TREE:"
8
+ # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
9
+ closed_rules = Evoc::RuleStore.new(query: query)
10
+ tree.children.each do |consequent|
11
+ self.extend_nodes(consequent).each do |frequency, closed_sets|
12
+ closed_sets.each do |closed_set|
13
+ antecedent = closed_set - consequent.name
14
+ closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
15
+ end
16
+ end
17
+ end
18
+ return closed_rules
19
+ end
20
+
21
+ private
22
+ def self.initialize_tree(tx_store, query)
23
+ tree = Tree::TreeNode.new([])
24
+ # find all items that changed with something in the query
25
+ query_changed_in = tx_store.transactions_of_list(query)
26
+ # store all items from the query that have changed with each consequent
27
+ query_changed_in.each do |tx_id|
28
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
29
+ antecedent = (query & tx.items)
30
+ consequents = (tx.items - antecedent)
31
+ if consequents.size != 0
32
+ consequents.each do |consequent|
33
+ consequent_key = [consequent.to_s]
34
+ if tree[consequent_key].nil?
35
+ # initialize candidates
36
+ tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
37
+ end
38
+ txes_consequent = tree[consequent_key].content
39
+ antecedent.each do |item|
40
+ union = [item.to_s,consequent.to_s]
41
+ if tree[consequent_key][union].nil?
42
+ txes_union = tx_store.transactions_of(item) & txes_consequent
43
+ tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ return(tree)
50
+ end
51
+
52
+ def self.extend_nodes(root,closed_rules: {})
53
+ current_node = root.first_child
54
+ while(!current_node.nil?) do
55
+ a = current_node
56
+ b = a.next_sibling
57
+ while(!b.nil?) do
58
+ # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
59
+ ab = a.name | b.name
60
+ a_txes = a.content
61
+ b_txes = b.content
62
+ ab_txes = a_txes & b_txes
63
+ # check properties
64
+ # 1. when txes are the same
65
+ # - remove B
66
+ # - replace all A with union of A and B
67
+ if ab_txes.size > 0
68
+ case self.compare(a_txes,b_txes)
69
+ when 'EQUAL'
70
+ # puts " EQUAL"
71
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
72
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
73
+ temp = b.previous_sibling
74
+ root.remove!(b)
75
+ b = temp
76
+ a.each {|n| n.rename(ab | n.name)}
77
+ when 'A_IN_B'
78
+ # puts " A in B"
79
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
80
+ a.each {|n| n.rename(ab | n.name)}
81
+ when 'B_IN_A'
82
+ # puts " B in A"
83
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
84
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
85
+ temp = b.previous_sibling
86
+ root.remove!(b)
87
+ b = temp
88
+ a << Tree::TreeNode.new(ab,ab_txes)
89
+ when 'NOT_EQUAL'
90
+ # puts " NOT EQUAL"
91
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
92
+ a << Tree::TreeNode.new(ab,ab_txes)
93
+ end
94
+ end
95
+ # puts "NEW TREE:"
96
+ # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
97
+ b = b.next_sibling
98
+ # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
99
+ # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
100
+ # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
101
+ end # siblings.each
102
+ if !a.children.empty?
103
+ # puts "TRAVERSING DOWN"
104
+ self.extend_nodes(a, closed_rules: closed_rules)
105
+ end
106
+ # add node as closed rule if not subsumed by another rule already added
107
+ rule_frequency = a.content.size
108
+ rule = a.name
109
+ if closed_rules[rule_frequency].nil?
110
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
111
+ closed_rules[rule_frequency] = [rule]
112
+ else
113
+ if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
114
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
115
+ closed_rules[rule_frequency] << rule
116
+ else
117
+ # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
118
+ end
119
+ end
120
+ current_node = current_node.next_sibling
121
+ end # children.each
122
+ return(closed_rules)
123
+ end #extend_nodes
124
+
125
+ def self.compare(a,b)
126
+ if a == b
127
+ return 'EQUAL'
128
+ # 2. when A is a subset of B
129
+ # - replace all A with union of A and B
130
+ elsif (a - b).empty?
131
+ return 'A_IN_B'
132
+ # 2. when B is a subset of A
133
+ # - remove B
134
+ # - add the union as new child
135
+ elsif (b - a).empty?
136
+ return 'B_IN_A'
137
+ # 4. contain different elements
138
+ # - add the union as new child
139
+ else
140
+ return 'NOT_EQUAL'
141
+ end
142
+ end
143
+ end
144
+ end
145
+
data/lib/evoc/evaluate.rb CHANGED
@@ -1,42 +1,251 @@
1
1
  module Evoc
2
- class Evaluate
2
+ module Evaluate
3
3
  extend Logging
4
4
 
5
+ def self.validateInput(input)
6
+ # verify format
7
+ if !input.is_a?(Array) || # not an array
8
+ !input.first.is_a?(Array) || # not containg an array
9
+ ![0,1].include?(input.first.first) # items are not 0s and 1s
10
+ raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected list of list of 0s and 1s, input was: #{input}"
11
+ end
12
+ end
5
13
 
6
- def self.execute(recommendation,expected_outcome,evaluator)
7
- if match = /average_precision(?<num>\d+)?/.match(evaluator)
8
- if match[:num].nil?
9
- self.average_precision(recommendation,expected_outcome)
10
- else
11
- self.average_precision(recommendation,expected_outcome,n: match[:num].to_i)
14
+ def self.mean_confidence(rules:)
15
+ if rules.empty? then return nil end
16
+ return (rules.inject(0) {|sum,r| sum + r.m_confidence.value}/rules.size).to_f
17
+ end
18
+
19
+ def self.discernibility(rec:)
20
+ # AP is 0 for the empty list
21
+ if rec.is_a?(Array) && rec.empty? # array and empty
22
+ return nil
23
+ end
24
+ self.validateInput(rec)
25
+
26
+ rec_size = 0
27
+ rec_clusters = 0
28
+
29
+ rec.each do |c|
30
+ rec_clusters = rec_clusters + 1
31
+ c.each do |e|
32
+ rec_size = rec_size + 1
12
33
  end
13
- elsif match = /top10_recall/.match(evaluator)
14
- self.top10_recall(recommendation,expected_outcome)
15
- else raise ArgumentError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
16
34
  end
35
+ return (rec_clusters/rec_size).to_f
36
+
17
37
  end
18
38
 
19
- # calculate the ratio of correct items in the top 10
20
- # @param [Array] recommendation a sorted array
21
- # @param [Array] expected_outcome an array of items
22
- # @return [Rational] the top10 recall
23
- def self.top10_recall(recommendation,expected_outcome)
24
- if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
25
- if (expected_outcome.size > 0) & !recommendation.empty?
26
- top10 = recommendation.take(10).flatten
27
- common_items = (expected_outcome & top10).size.to_r
28
- return common_items/expected_outcome.size
29
- else
30
- nil
39
+ def self.applicable(rec:)
40
+ if rec.is_a?(Array)
41
+ (rec <=> []).abs
42
+ else
43
+ raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected an array, input was: #{input}"
44
+ end
45
+ end
46
+
47
+ ##
48
+ # @return the f1 score (preision/recall harmonic mean)
49
+ def self.f1(rec:,exp:)
50
+ # AP is 0 for the empty list
51
+ if rec.is_a?(Array) && rec.empty? # array and empty
52
+ return nil
53
+ end
54
+ self.validateInput(rec)
55
+
56
+ rec_size = 0
57
+ rec_correct = 0
58
+
59
+ rec.each do |c|
60
+ c.each do |e|
61
+ rec_size = rec_size + 1
62
+ rec_correct = rec_correct + e
63
+ end
64
+ end
65
+ return (2*rec_correct/(rec_size + exp)).to_f
66
+ end
67
+
68
+
69
+ ##
70
+ # @return the rank of the first relevant itemjk
71
+ def self.first_relevant(rec:)
72
+ # AP is 0 for the empty list
73
+ if rec.is_a?(Array) && rec.empty? # array and empty
74
+ return nil
75
+ end
76
+ self.validateInput(rec)
77
+
78
+ last_checked = 1
79
+ rec.each do |c|
80
+ c.each do |e|
81
+ if e == 1
82
+ return last_checked
83
+ end
84
+ last_checked = last_checked + 1
31
85
  end
86
+ end
87
+ return nil
32
88
  end
33
89
 
90
+ ##
91
+ # @return the rank of the last relevant itemjk
92
+ def self.last_relevant(rec:)
93
+ # AP is 0 for the empty list
94
+ if rec.is_a?(Array) && rec.empty? # array and empty
95
+ return nil
96
+ end
97
+ self.validateInput(rec)
98
+
99
+ size = rec.inject(0) {|sum,c| sum + c.size}
100
+ last_checked = size
101
+ rec.reverse_each do |c|
102
+ c.reverse_each do |e|
103
+ if e == 1
104
+ return last_checked
105
+ end
106
+ last_checked = last_checked - 1
107
+ end
108
+ end
109
+ return nil
110
+ end
111
+
112
+ def self.recall10(rec:,exp: nil)
113
+ # AP is 0 for the empty list
114
+ if rec.is_a?(Array) && rec.empty? # array and empty
115
+ return nil
116
+ end
117
+ self.validateInput(rec)
118
+ return self.recall(rec: [rec.take(10).flatten.take(10)],exp: exp)
119
+ end
120
+
121
+ def self.precision10(rec:,exp: nil)
122
+ # AP is 0 for the empty list
123
+ if rec.is_a?(Array) && rec.empty? # array and empty
124
+ return nil
125
+ end
126
+ self.validateInput(rec)
127
+ return self.precision(rec: [rec.take(10).flatten.take(10)])
128
+ end
129
+
130
+ def self.precision(rec:,exp: nil)
131
+ # AP is 0 for the empty list
132
+ if rec.is_a?(Array) && rec.empty? # array and empty
133
+ return nil
134
+ end
135
+ self.validateInput(rec)
136
+
137
+ size_rec = rec.inject(0) {|sum,c| sum + c.size}
138
+ num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
139
+
140
+ return (num_correct_in_rec/size_rec).to_f
141
+ end
142
+
143
+ def self.recall(rec:,exp: nil)
144
+ # AP is 0 for the empty list
145
+ if rec.is_a?(Array) && rec.empty? # array and empty
146
+ return nil
147
+ end
148
+ self.validateInput(rec)
149
+
150
+ num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
151
+
152
+ if exp.nil?
153
+ return num_correct_in_rec
154
+ else
155
+ if num_correct_in_rec > exp
156
+ raise ArgumentError, "Found more relevant items than the provided number of relevant items"
157
+ end
158
+ return (num_correct_in_rec/exp).to_f
159
+ end
160
+ end
161
+ # clustered recommendation is expected to be a sorted list V
162
+ # where V = [V1,V2,..Vn]
163
+ # and Vi is a cluster of items with the same weight like [rel_1,rel_2,..,rel_n]
164
+ # where rel_i is 1 if the item is relevant and 0 if not
165
+
166
+ # r_p : relevant items in previous groups
167
+ # i_p : index previous group
168
+ # r_g : relevant items in group
169
+ # n_g : items in group
170
+ # i : index of current item
171
+ def self.t_ap(rec:,exp: nil)
172
+ # AP is 0 for the empty list
173
+ if rec.is_a?(Array) && rec.empty? # array and empty
174
+ return nil
175
+ end
176
+ self.validateInput(rec)
177
+
178
+ ap = 0
179
+ r_p = 0
180
+ i_p = 0
181
+ rec.each do |cluster|
182
+ r_g = cluster.inject(&:+).to_r
183
+ n_g = cluster.size.to_r
184
+ cluster.each_with_index do |_,i|
185
+ i = i_p + i + 1
186
+ chance_relevant = r_g/n_g
187
+ avg_previous_rel = if (n_g == 1)
188
+ (r_p + 1) * (1/i)
189
+ else
190
+ (r_p + (i - i_p - 1)*((r_g-1)/(n_g-1)) + 1) * (1/i)
191
+ end
192
+
193
+ item_ap_contribution = chance_relevant * avg_previous_rel
194
+
195
+ ap = ap + item_ap_contribution
196
+ end
197
+ r_p = r_p + r_g
198
+ i_p = i_p + n_g
199
+ end
200
+ # if the number of relevant documents is not supplied
201
+ # assume that the recommendation contains all relevant documents
202
+ if exp.nil?
203
+ exp = r_p
204
+ else
205
+ if r_p > exp
206
+ raise ArgumentError, "Found more relevant items than the provided number of relevant items"
207
+ end
208
+ end
209
+ return (r_p == 0 ? 0 : (ap/exp).to_f)
210
+ end
211
+
212
+ def self.ap(rec:,exp: nil)
213
+ # AP is 0 for the empty list
214
+ if rec.is_a?(Array) && rec.empty? # array and empty
215
+ return nil
216
+ end
217
+ self.validateInput(rec)
218
+
219
+ i = 0
220
+ correct_i = 0
221
+ ap = 0
222
+
223
+ rec.each do |cluster|
224
+ cluster.each do |item|
225
+ i = i + 1
226
+ correct_i = correct_i + item
227
+ precision_i = correct_i/i
228
+ ap = ap + (precision_i*item)
229
+ end
230
+ end
231
+
232
+ if exp.nil?
233
+ exp = correct_i
234
+ else
235
+ if correct_i > exp
236
+ raise ArgumentError, "Found more relevant items than the provided number of relevant items"
237
+ end
238
+ end
239
+ return (exp == 0 ? 0 : (ap/exp).to_f)
240
+
241
+ end
34
242
  ##
35
243
  # calculate the average precision of the result based on an expected outcome
36
244
  # @param [Array] recommendation a sorted array
37
245
  # @param [Array] expected_outcome an array of items
38
246
  # @return [Float] the average precision
39
- def self.average_precision(recommendation,expected_outcome, n: recommendation.size)
247
+ def self.average_precision(recommendation,expected_outcome)
248
+ raise Error.new "#average_precision has been deprecated, use #ap instead"
40
249
  if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
41
250
  if (expected_outcome.size > 0) & !recommendation.empty?
42
251
  average_precision = 0
@@ -45,7 +254,7 @@ module Evoc
45
254
  # sort rules by weight
46
255
  # we first group rules with equal weights
47
256
  # and then sort the groups by weight
48
- recommendation.take(n).each do |items|
257
+ recommendation.each do |items|
49
258
  if !items.is_a?(Array) then items = [items] end
50
259
  if items.first.class != expected_outcome.first.class
51
260
  raise ArgumentError, "Expected outcome was of type #{expected_outcome.first.class}, while the item in the recommendation was of type #{items.first.class}"
@@ -71,39 +280,5 @@ module Evoc
71
280
  nil
72
281
  end
73
282
  end
74
-
75
- # calculate the grouped average precision of the result based on an expected outcome
76
- def self.e_collected_average_precision(expected_outcome)
77
- if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
78
- if (expected_outcome.size > 0) & !self.empty?
79
- collected_average_precision = 0
80
- correct_items = []
81
- total_items_considered = []
82
- # sort rules by weight
83
- # we first group rules with equal weights
84
- # and then sort the groups by weight
85
- groups = self.group_by {|r| r.weight}.sort.reverse
86
- groups.each do |(_,rules)|
87
- items = rules.map(&:rhs).flatten.uniq
88
- if (new_items = items - total_items_considered).size > 0
89
- new_items.each {|item| total_items_considered << item}
90
- if correct_in_group = (items & expected_outcome)
91
- if correct_in_group.size > 0
92
- # make sure that the new items havent already been added earlier
93
- new_correct = (correct_in_group - correct_items)
94
- # add new items
95
- new_correct.each {|item| correct_items << item}
96
- change_in_recall = new_correct.size.to_r/expected_outcome.size
97
- precision_at_k = correct_items.size.to_r/total_items_considered.size
98
- collected_average_precision += (precision_at_k * change_in_recall)
99
- end
100
- end
101
- end
102
- end
103
- self.collected_average_precision = collected_average_precision.to_f
104
- else
105
- self.collected_average_precision = nil
106
- end
107
- end
108
283
  end
109
284
  end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class ConfigurationError < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class FormatError < StandardError
4
+ end
5
+ end
6
+ end
@@ -78,6 +78,23 @@ module Evoc
78
78
  # tx_id, query
79
79
  #
80
80
  def generate_queries
81
+ ##
82
+ # write dict
83
+ ##
84
+ if path = self.opts[:write_dict]
85
+ tmp = Tempfile.new('dict')
86
+ begin
87
+ tmp.puts("id,name")
88
+ Evoc::HistoryStore.base_history.int_2_name.each do |id,name|
89
+ tmp.puts("#{id},#{name}")
90
+ end
91
+ tmp.close
92
+ FileUtils.mv(tmp.path,path)
93
+ ensure
94
+ tmp.close
95
+ tmp.unlink
96
+ end
97
+ end
81
98
  ##
82
99
  # WRITE CSV HEADER
83
100
  CSV {|row| row << %W(tx_id query)}
@@ -124,7 +141,7 @@ module Evoc
124
141
  # 2. randomly select X in specified = Y
125
142
  # 3. randomly select Y in tx
126
143
  elsif !random_sizes.empty? & !specified_sizes.empty?
127
- specified_sizes.select! {|s| (s < tx_size) & (s > 1)} #1.
144
+ specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
128
145
  if randomly_sampled_size = specified_sizes.sample #2.
129
146
  sampled_queries = [items.sample(randomly_sampled_size)] #3.
130
147
  end
@@ -216,27 +233,53 @@ module Evoc
216
233
  break
217
234
  end
218
235
  end
236
+ # get query
237
+ query_hash = query.to_h
238
+ # convert query string to array of items
239
+ query_hash['query'] = query_hash['query'].split(',').map(&:to_i)
240
+ # verify query before executing
241
+ if tx = Evoc::HistoryStore.base_history.get_tx(id: query_hash['tx_id'],id_type: :id)
242
+ if !(query_hash['query'] - tx.items).empty?
243
+ raise Evoc::Exceptions::ConfigurationError.new "The query generated from #{query_hash['tx_id']} was not a subset of the same tx in the loaded history. The query was: '#{query_hash['query']}', the tx was '#{tx.items}'"
244
+ end
245
+ else
246
+ raise Evoc::Exceptions::ConfigurationError.new "Could not find the tx: '#{query_hash['tx_id']}' from #{self.opts[:queries]} in the history #{self.opts[:transactions]}"
247
+ end
248
+
219
249
  current_scenario = 1
250
+ last_error = 'no errors'
220
251
  # - compact removes nil values (not used factors)
221
252
  # - the splat operator '*' turns the array into parameters for #product
222
253
  # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
223
254
  factors.first.product(*factors[1..-1]).each do |scenario|
224
255
  # Print progress to stderr
225
- STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines} (scenarios skipped: #{invalid_configuration}) \r"
256
+ STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
257
+ if invalid_configuration > 0
258
+ STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
259
+ else
260
+ STDERR.print " \r"
261
+ end
226
262
 
227
- query_hash = query.to_h
228
- # convert query to array
229
- query_hash['query'] = query_hash['query'].split(',')
230
263
  params = query_hash.merge(scenario.to_h)
231
264
  params[:case_id] = self.opts[:case_id]
232
265
  params[:granularity] = self.opts[:granularity]
233
266
  # initialize scenario
234
- q = Evoc::Scenario.new(params)
267
+ s = Evoc::Scenario.new(params)
235
268
  begin
236
- result = q.call(evaluators: self.opts[:evaluators]).to_json
237
- $stdout.puts result
269
+ Evoc::RecommendationCache.get_recommendation(algorithm: s.algorithm,
270
+ query: s.query,
271
+ model_start: s.model_start,
272
+ model_end: s.model_end,
273
+ max_size: s.max_size,
274
+ aggregator: s.aggregator,
275
+ measures: s.measures)
276
+ Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], top_k: self.opts[:top_k], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
277
+ result = Evoc::RecommendationCache.to_h(measures: s.measures)
278
+ # merge scenario params with result hash and dump as json
279
+ $stdout.puts s.to_h.merge(result).to_json
238
280
  rescue ArgumentError => e
239
281
  invalid_configuration += 1
282
+ last_error = e.message
240
283
  end
241
284
  current_scenario += 1
242
285
  end
@@ -9,7 +9,7 @@ module Evoc
9
9
  # time: the time it took to generate the currently cached recommendation
10
10
  # model_size: the number of transactions used when generating the currently cached recommendation
11
11
  class << self
12
- attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :time_evaluation, :filtered_model_size, :evaluation
12
+ attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
13
13
  end
14
14
 
15
15
  def self.recommendation_cached?(algorithm:,
@@ -20,6 +20,7 @@ module Evoc
20
20
  return self.tag == [algorithm,query,model_start,model_end,max_size].hash
21
21
  end
22
22
 
23
+
23
24
  def self.get_recommendation(algorithm:,
24
25
  query:,
25
26
  model_start:,
@@ -75,14 +76,13 @@ module Evoc
75
76
  # @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
76
77
  #
77
78
  # @return [Hash[aggregator][evaluator][result]] the hash of results
78
- def self.evaluate(evaluators: ,expected_outcome:,measure_combination: )
79
+ def self.evaluate_last(evaluators: ,top_k: nil, unique_consequents: nil,expected_outcome:,measure_combination: )
79
80
  if !self.last_recommendation.nil?
80
- t1 = Time.new
81
- evaluators.each do |evaluator|
82
- self.evaluation[evaluator] = self.last_recommendation.evaluate_with(evaluator: evaluator,expected_outcome: expected_outcome,measure_combination: measure_combination)
83
- end
84
- t2 = Time.new
85
- self.time_evaluation = TimeDifference.between(t1,t2).in_seconds.round(8)
81
+ self.evaluation = self.last_recommendation.evaluate_with(evaluators: evaluators,
82
+ top_k: top_k,
83
+ unique_consequents: unique_consequents,
84
+ expected_outcome: expected_outcome,
85
+ measure_combination: measure_combination)
86
86
  else
87
87
  STDERR.puts "TAG = #{self.tag}No recommendation to evaluate"
88
88
  end
@@ -94,10 +94,7 @@ module Evoc
94
94
  # time: 'execution time',
95
95
  # filtered_model_size:
96
96
  # number_of_rules :
97
- # evaluation: {
98
- # average_precision: ..,
99
- # ..next evaluator..
100
- # }
97
+ # average_precision:
101
98
  # rules: [
102
99
  # {
103
100
  # lhs: [lhs]
@@ -118,7 +115,6 @@ module Evoc
118
115
  recommendation_hash[:time_rulegeneration] = self.time_rulegeneration
119
116
  recommendation_hash[:time_measurecalculation] = self.time_measurecalculation
120
117
  recommendation_hash[:time_aggregation] = self.time_aggregation
121
- recommendation_hash[:time_evaluation] = self.time_evaluation
122
118
  recommendation_hash[:filtered_model_size] = self.filtered_model_size
123
119
  recommendation_hash[:number_of_baserules] = self.base_recommendation.size
124
120
  recommendation_hash[:number_of_rules] = self.last_recommendation.size
@@ -128,10 +124,11 @@ module Evoc
128
124
  sum + r.get_measure('m_hyper_coefficient').value } / self.last_recommendation.size
129
125
  recommendation_hash[:largest_antecedent] = self.last_recommendation.largest_antecedent
130
126
  if !self.evaluation.nil?
131
- self.evaluation.each do |evaluator,value|
132
- recommendation_hash[evaluator.to_sym] = value[:value]
133
- recommendation_hash[:unique_consequents] = value[:unique_consequents]
134
- end
127
+ self.evaluation.each do |evaluator,results|
128
+ recommendation_hash[evaluator] = results['value']
129
+ # time can also be added like this:
130
+ # recommendation_hash[evaluator+'_time'] = results['time']
131
+ end
135
132
  end
136
133
  recommendation_hash[:rules] = []
137
134
  self.last_recommendation.each do |rule|
data/lib/evoc/rule.rb CHANGED
@@ -21,15 +21,19 @@ module Evoc
21
21
  name
22
22
  end
23
23
 
24
+ def human_name
25
+ "#{human_lhs} -> #{human_rhs}"
26
+ end
27
+
24
28
  def human_lhs
25
- if !self.tx_store.nil? & self.lhs.all? {|i| i.is_a?(Numeric)}
26
- self.lhs.map{|i| self.tx_store.int_2_name[i]}
29
+ if !self.tx_store.nil? # & self.lhs.all? {|i| i.is_a?(Numeric)}
30
+ self.tx_store.ints2names(self.lhs.map(&:to_i)).join(',')
27
31
  end
28
32
  end
29
33
 
30
34
  def human_rhs
31
- if !self.tx_store.nil? & self.rhs.all? {|i| i.is_a?(Numeric)}
32
- self.rhs.map{|i| self.tx_store.int_2_name[i]}
35
+ if !self.tx_store.nil? #& self.rhs.all? {|i| i.is_a?(Numeric)}
36
+ self.tx_store.ints2names(self.rhs.map(&:to_i)).join(',')
33
37
  end
34
38
  end
35
39
 
@@ -116,6 +116,33 @@ module Evoc
116
116
  end
117
117
 
118
118
 
119
+ # Needed by Evaluate mixin
120
+ def evaluation_format(measures:, expected_outcome:)
121
+ current_weight = nil
122
+ current_group = []
123
+ recommendation = []
124
+ # sort and filter out duplicate consequents
125
+ self.sort_on(measures: measures, rules: self.unique_by(measures.first)).each do |r|
126
+ expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
127
+ weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
128
+ if current_weight.nil?
129
+ current_weight = weight_tag
130
+ end
131
+ if weight_tag == current_weight
132
+ current_group << expected
133
+ else
134
+ recommendation << current_group
135
+ current_group = [expected]
136
+ current_weight = weight_tag
137
+ end
138
+ end
139
+ # add last group if not empty
140
+ if !current_group.empty?
141
+ recommendation << current_group
142
+ end
143
+ return recommendation
144
+ end
145
+
119
146
  ##
120
147
  # Evaluate this recommendation using the given evaluator
121
148
  #
@@ -126,27 +153,35 @@ module Evoc
126
153
  # @param [String] evaluator the method to use for evaluating
127
154
  # @param [Array] expected_outcome the list of items to evaluate against
128
155
  # @param [Array] measure_combination the list of measures used to first sort the recommendation
129
- def evaluate_with(evaluator: :average_precision,expected_outcome:,measure_combination: )
156
+ def evaluate_with(evaluators:,expected_outcome:,measure_combination:,top_k: nil,unique_consequents: nil)
130
157
  if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
131
- result = Hash.new
132
- logger.debug "#{__method__} params: evaluator: #{evaluator}, measure_combination: #{measure_combination}"
158
+ logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
133
159
  # sort the rules on each combination and evaluate
134
- sorted_rules = []
135
- if self.aggregator =~ /_hc\z/
136
- sorted_rules = self.sort_on(measures: measure_combination+['m_hyper_coefficient'])
137
- elsif !self.aggregator.nil?
138
- sorted_rules = self.sort_on(measures: measure_combination)
139
- else
140
- # not an aggregation
141
- # get the strongest unique rules
142
- unique_rules = self.unique_by(measure_combination.first)
143
- sorted_rules = self.sort_on(rules: unique_rules,measures: measure_combination)
144
- end
145
- # get the recommended items
146
- recommendation = sorted_rules.map(&:rhs)
160
+ # if !top_k.nil?
161
+ # raise ArgumentError, "Top K must be a number" unless top_k.is_a?(Numeric)
162
+ # sorted_rules = sorted_rules.take(top_k)
163
+ # end
164
+ # convert rules into format used in evaluation
165
+ # map to 0/1 list where 1 is a correct item and 0 is not
166
+ # second item in each tuple gives the weight of the rule
147
167
  # evaluate the sorted list against the expected outcome
148
- result[:value] = Evoc::Evaluate.execute(recommendation,expected_outcome,evaluator)
149
- return result
168
+ recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome)
169
+ potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
170
+ results = Hash.new
171
+ evaluators.each do |evaluator|
172
+ t1 = Time.new
173
+ if Evoc::Evaluate.respond_to?(evaluator)
174
+ results[evaluator] = Hash.new
175
+ method_params = Evoc::Evaluate.method(evaluator).parameters.map(&:second)
176
+ params = potential_params.select {|k,v| method_params.include?(k)}
177
+ results[evaluator]['value'] = Evoc::Evaluate.method(evaluator).call(params)
178
+ else
179
+ raise NoMethodError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
180
+ end
181
+ t2 = Time.new
182
+ results[evaluator]['time'] = TimeDifference.between(t1,t2).in_seconds.round(8)
183
+ end
184
+ return results
150
185
  end
151
186
 
152
187
  ##
@@ -163,9 +198,9 @@ module Evoc
163
198
  # where each consequent is the strongest given by the input measure
164
199
  #
165
200
  # @param: [String] measure the measure used to find the strongest rules
166
- def unique_by(measure)
201
+ def unique_by(measure, rules: self)
167
202
  selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
168
- self.each do |rule|
203
+ rules.each do |rule|
169
204
  if !rule.get_measure(measure).value.nil?
170
205
  key = rule.rhs.first
171
206
  if selected_rules[key].nil?
@@ -223,7 +258,7 @@ module Evoc
223
258
  csv << ['rule'] + defined_measures
224
259
  self.each do |rule|
225
260
  row = CSV::Row.new([],[],false)
226
- row << rule.name
261
+ row << rule.human_name
227
262
  defined_measures.each do |m|
228
263
  row << rule.get_measure(m).value
229
264
  end
@@ -287,11 +322,9 @@ module Evoc
287
322
  end
288
323
 
289
324
  def ==other
290
- ( self.map(&:lhs) == other.map(&:lhs) ) &
291
- ( self.map(&:rhs) == other.map(&:rhs) ) &
292
- Evoc::Rule.measures.each do |m|
293
- self.map {|r| r.get_measure(m)} == other.map {|r| r.get_measure(m)}
294
- end
325
+ self_rules = self.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
326
+ other_rules = other.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
327
+ self_rules == other_rules
295
328
  end
296
329
 
297
330
  def size
data/lib/evoc/scenario.rb CHANGED
@@ -53,30 +53,6 @@ module Evoc
53
53
  comparison
54
54
  end
55
55
 
56
- ##
57
- # Executes a query given the current paramaters
58
- # This results in a set of association rules, i.e., a recommendation
59
- #
60
- # Producing a recommendation is done through the following process:
61
- #
62
- # 1. Generate rules using a mining algorithm on the specified history
63
- # 2. Calculate interestingness measures on the generated rules
64
- # (optional) 3. Aggregate rules to further improve recommendation
65
- # (optional) 4. Evaluate how good the recommendation is
66
- #
67
- # @return [Hash] containing the query + scenario + recommendation + other metadata
68
- def call(evaluators: [])
69
- #generate recommendation in cache (generate rules + measures on rules)
70
- self.recommendation
71
-
72
- # evaluate if requested
73
- if !evaluators.empty?
74
- Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
75
- end
76
- # build return hash
77
- recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
78
- return self.to_h.merge(recommendation)
79
- end
80
56
 
81
57
  def to_h
82
58
  fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
@@ -88,16 +64,6 @@ module Evoc
88
64
  return hash
89
65
  end
90
66
 
91
- def recommendation
92
- Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
93
- query: self.query,
94
- model_start: self.model_start,
95
- model_end: self.model_end,
96
- max_size: self.max_size,
97
- aggregator: self.aggregator,
98
- measures: self.measures)
99
- end
100
-
101
67
  def recommendation?
102
68
  Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
103
69
  query: self.query,
@@ -0,0 +1,26 @@
1
+ # Extending the rubytree gem with some additional methods
2
+ # see: http://rubytree.anupamsg.me/
3
+ module Tree
4
+ class TreeNode
5
+
6
+ ##
7
+ # @return the right siblings of the current node
8
+ def right_siblings
9
+ if self.is_last_sibling?
10
+ return []
11
+ else
12
+ return [self.next_sibling] + self.next_sibling.right_siblings
13
+ end
14
+ end
15
+
16
+ ##
17
+ # @return the left siblings of the current node
18
+ def left_siblings
19
+ if self.is_first_sibling?
20
+ return []
21
+ else
22
+ return [self.previous_sibling] + self.previous_sibling.left_siblings
23
+ end
24
+ end
25
+ end
26
+ end
data/lib/evoc/tx_store.rb CHANGED
@@ -26,6 +26,14 @@ module Evoc
26
26
  end
27
27
 
28
28
 
29
+ def names2ints(names)
30
+ names.map {|n| self.name_2_int[n]}
31
+ end
32
+
33
+ def ints2names(ints)
34
+ ints.map {|i| self.int_2_name[i]}
35
+ end
36
+
29
37
  ##
30
38
  # self << tx
31
39
  #
data/lib/evoc/util.rb ADDED
@@ -0,0 +1,37 @@
1
+
2
+ module Evoc
3
+ module Util
4
+ # helper function to generate a lattice so we can easily come up with tests for the closed rules mining
5
+ # examples nodes: [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
6
+ # first elem is item name
7
+ # second elem is the txes where this item changes
8
+ def self.lattice(nodes,filter: nil)
9
+ (1..nodes.size).each do |n|
10
+ nodes.combination(n).each do |comb|
11
+ # [['a',[1,2]],['b',[2,3]]]
12
+ union = comb.map(&:first).join(',')
13
+ frequency = comb.map(&:second).inject(&:&).size
14
+ if filter =~ union
15
+ if frequency > 0
16
+ printf("%#{nodes.size*2}s",[union,frequency].join(':'))
17
+ end
18
+ end
19
+ end
20
+ puts
21
+ end
22
+ end
23
+
24
+ # helper function for generating a txstore from the following format
25
+ # [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
26
+ # (same structure as used for lattice creation)
27
+ def self.nodes2txstore(nodes)
28
+ txes = nodes.map(&:second).inject(&:|)
29
+ store = Evoc::TxStore.new
30
+ txes.each do |id|
31
+ items = nodes.select {|n| n.second.include?(id)}.map(&:first)
32
+ store << Evoc::Tx.new(id: id, items: items)
33
+ end
34
+ return(store)
35
+ end
36
+ end
37
+ end
data/lib/evoc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.6.2"
2
+ VERSION = "3.7.0"
3
3
  end
@@ -1,10 +1,20 @@
1
1
  require_relative 'cli_helper'
2
+ # override printing of help text as the default does not respect spaces and adds newlines
3
+ class Thor
4
+ module Shell
5
+ class Basic
6
+ def print_wrapped(message, options = {})
7
+ stdout.puts message
8
+ end
9
+ end
10
+ end
11
+ end
2
12
 
3
13
  module EvocCLI
4
14
  class Experiment < Thor
5
15
  class_option :case_id, type: :string, desc: "Specify case identifier."
6
16
  class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
- class_option :transactions, :aliases => '-t', :type => :string, :required => true, :desc => "Path to change-history"
17
+ class_option :transactions, :aliases => '-t', :type => :string, :desc => "Path to change-history"
8
18
  class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
9
19
 
10
20
  ##
@@ -35,6 +45,7 @@ module EvocCLI
35
45
  desc: "Percentage of items to select for each query"
36
46
  method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
37
47
  method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
48
+ method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
38
49
  desc "generate_queries [options]", "Generate queries from <transactions>"
39
50
  def generate_queries
40
51
  #MemoryProfiler.start('create_queries',30)
@@ -59,7 +70,52 @@ module EvocCLI
59
70
  desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
60
71
  method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
61
72
  method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
73
+ method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
74
+ method_option :top_k, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER an evential unique consequents filter"
62
75
  desc "execute_scenarios [options]",""
76
+ long_desc <<-LONGDESC
77
+ keyword description
78
+ ------- -----------
79
+
80
+ case_id: user provided tag for the history used
81
+ granularity: granularity of the history used
82
+ scenario_id: a unique indentifier for this scenario
83
+ tx_id: the sha of the commit that the query was sampled from
84
+ tx_index: the index of this transaction in the used history (0 is oldest)
85
+ tx_size: the number of items in the transaction
86
+ query_size: the number of items in the query
87
+ query_percentage: query_size/tx_size
88
+ expected_outcome_size: tx - query
89
+ model_size: number of previous transactions relative to this one
90
+ model_hours: time span from the first transaction to this one
91
+ model_age: number of transactions between end of model and this transaction
92
+ max_size: transactions larger than this are filtered out before generating rules
93
+ filtered_model_size: model size after the max_size filtering
94
+ algorithm: the mining algorithm used to generate the recommendation
95
+ aggregator: the aggregation function used to aggregate the rules of the recommendation
96
+ measures: the interestingnessmeasures used to rank each rule
97
+ recommendation_tag: a unique identifiter of the rules used as a basis for the recommendation
98
+ time_rulegeneration: how long it took to generate the rules
99
+ time_measurecalculation: how long it took to calculate the measures for each rule
100
+ time_aggregation: how long it took to aggregate the rules
101
+ number_of_baserules: number of rules before aggregation
102
+ number_of_rules: number of rules after aggregation (equal to number_of_baserules when not aggregating)
103
+ number_of_hyperrules: number of hyper rules after aggregating
104
+ mean_hyper_coefficient: average number of rules aggregated in each hyper rule
105
+ largest_antecedent: number of items in the largest antecedent (lhs of rule)
106
+ t_ap: average precision where ties are accounted for
107
+ ap: the average precision
108
+ precision: ratio of correct to incorrect items
109
+ precision10: ratio of correct to incorrect items in the top 10
110
+ recall: ratio of correct items in recommendation to full set of expected items
111
+ recall19: ratio of correct items in recommendation to full set of expected items in the top 10
112
+ mean_confidence: the average confidence of the rules in this recommendation
113
+ discernibility: the number of uniquely weighted rules to the number of rules
114
+ applicable: 1 if rules were generated, 0 otherwise
115
+ f1: the f1 measure
116
+ first_relevant: the rank of the first correct item
117
+ last_relevant: the rank of the last correct item
118
+ LONGDESC
63
119
  def execute_scenarios
64
120
  if !options[:permutation].nil?
65
121
  STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
data/lib/evoc_helper.rb CHANGED
@@ -28,7 +28,10 @@ require 'logger'
28
28
  require 'zip'
29
29
  require 'zip/filesystem'
30
30
  require 'set'
31
+ require 'tempfile'
32
+ require 'fileutils'
31
33
  require 'algorithms' # various efficient data structures
34
+ require 'tree' #general purpose tree structure
32
35
  require 'mathn' # enhances the Rational (and others) number type
33
36
  Evoc::Env.load('google_hash',"please install to improve performance")
34
37
  #Evoc::Env.load('nmatrix')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.2
4
+ version: 3.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-20 00:00:00.000000000 Z
11
+ date: 2017-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - ">="
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubytree
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
153
167
  description:
154
168
  email:
155
169
  - mail@thomasrolfsnes.com
@@ -172,12 +186,15 @@ files:
172
186
  - evoc.gemspec
173
187
  - lib/evoc.rb
174
188
  - lib/evoc/algorithm.rb
189
+ - lib/evoc/algorithms/closed_rules.rb
175
190
  - lib/evoc/algorithms/top_k.rb
176
191
  - lib/evoc/analyze.rb
177
192
  - lib/evoc/array.rb
178
193
  - lib/evoc/evaluate.rb
179
194
  - lib/evoc/exceptions/aggregation_error.rb
195
+ - lib/evoc/exceptions/configuration_error.rb
180
196
  - lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb
197
+ - lib/evoc/exceptions/format_error.rb
181
198
  - lib/evoc/exceptions/measure_calculation_error.rb
182
199
  - lib/evoc/exceptions/no_changed_items_in_changes.rb
183
200
  - lib/evoc/exceptions/no_changes_in_json_object.rb
@@ -206,8 +223,10 @@ files:
206
223
  - lib/evoc/rule_store.rb
207
224
  - lib/evoc/scenario.rb
208
225
  - lib/evoc/svd.rb
226
+ - lib/evoc/tree/tree_node.rb
209
227
  - lib/evoc/tx.rb
210
228
  - lib/evoc/tx_store.rb
229
+ - lib/evoc/util.rb
211
230
  - lib/evoc/version.rb
212
231
  - lib/evoc_cli/analyze.rb
213
232
  - lib/evoc_cli/cli_helper.rb