evoc 3.6.2 → 3.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8945b15a68ce28d5c3c9e8b10b2478f3fcbb139f
4
- data.tar.gz: 597aaf2639f69c61788312c030c76706e419b0bf
3
+ metadata.gz: 2b867c7a5e05b3c2be58b9dd361554a43f3f277c
4
+ data.tar.gz: 9d00091a6fd7685f048930889aaf252df5aa6634
5
5
  SHA512:
6
- metadata.gz: 804e7b6ec7b27c35b09697a44f5f20895efe94a31631c8068ba83861d69856e948820056b22571b10bd47ce09a2104dd73f21cd18df0bf49619df23813425d61
7
- data.tar.gz: 9ad3251c853afb583f0f257cec83cb09959aab339140cea5a7c032d48ff7e023a80f8239ae6b269d0357aafa0e8757c6912f6814fe8b85e51e285dd85e83e44d
6
+ metadata.gz: f89dbef20f735e0f8c6b8f7104ea8b38a4dd118707089b17acdf529591fd652478406047cb5d4b08e18afb746d3edf46dc7cf9ac7eb208d214f91e9050841c12
7
+ data.tar.gz: 7e1955af5653df5d7afd986e9d522178b32d10c26771833e4d37aec910dd93208de56c8a6f83bd21faeb8796b098da808f72a05c9f6e3caf1100064fd20d7bb0
data/.gitignore CHANGED
@@ -13,3 +13,5 @@ evoc.log
13
13
  /.history
14
14
  /GRTAGS
15
15
  *TAGS
16
+ *~
17
+ *#
data/evoc.gemspec CHANGED
@@ -27,4 +27,5 @@ Gem::Specification.new do |spec|
27
27
  spec.add_runtime_dependency "ruby-progressbar"
28
28
  spec.add_runtime_dependency "rubyzip"
29
29
  spec.add_runtime_dependency "algorithms"
30
+ spec.add_runtime_dependency "rubytree"
30
31
  end
@@ -20,8 +20,8 @@ module Evoc
20
20
  Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
21
21
  elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
22
22
  Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
23
- elsif Evoc::Algorithm.respond_to?(algorithm+'_algorithm')
24
- Evoc::Algorithm.method(algorithm+'_algorithm').call(tx_store:tx_store,query:query)
23
+ elsif Evoc::Algorithm.respond_to?(algorithm)
24
+ Evoc::Algorithm.method(algorithm).call(tx_store:tx_store,query:query)
25
25
  else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
26
26
  end
27
27
  end
@@ -105,6 +105,70 @@ module Evoc
105
105
  end
106
106
 
107
107
 
108
+ def self.not_subsumed(tx_store:, query:)
109
+ #initial filter, we consider all txes where something in the query changed
110
+ query_changed_in = tx_store.transactions_of_list(query)
111
+ # now find what subsets of the query changed in each tx
112
+ trie = Containers::Trie.new
113
+ query_changed_in.each do |tx_id|
114
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
115
+ antecedent = (query & tx.items)
116
+ consequents = (tx.items - antecedent)
117
+ if consequents.size != 0
118
+ consequents.each do |consequent|
119
+ entry = "#{consequent.to_s}#{antecedent.join('')}"
120
+ if trie.get(entry).nil?
121
+ puts "ADDED #{entry}"
122
+ trie.push(entry,consequent.to_s)
123
+ end
124
+ end
125
+ end
126
+ end
127
+ return trie
128
+ # now generate rules
129
+ # rule_store = Evoc::RuleStore.new(query: query)
130
+ # rules.each do |consequent,antecedents|
131
+ # antecedents.each do |antecedent|
132
+ # rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
133
+ # end
134
+ # end
135
+ # return rule_store
136
+ end
137
+
138
+ ##
139
+ # Find the largest rules for each unique consequent
140
+ def self.largest_rules(tx_store:,query:)
141
+ #initial filter, we consider all txes where something in the query changed
142
+ query_changed_in = tx_store.transactions_of_list(query)
143
+ # now find what subsets of the query changed in each tx
144
+ rules = Hash.new
145
+ query_changed_in.each do |tx_id|
146
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
147
+ antecedent = (query & tx.items)
148
+ consequents = (tx.items - antecedent)
149
+ if consequents.size != 0
150
+ consequents.each do |consequent|
151
+ if rules[consequent].nil?
152
+ rules[consequent] = Set.new([antecedent]) # new consequent
153
+ elsif antecedent.size > rules[consequent].first.size # larger antecedent
154
+ rules[consequent] = Set.new([antecedent])
155
+ elsif antecedent.size == rules[consequent].first.size # equally large antecedent
156
+ rules[consequent] << antecedent
157
+ end
158
+ end
159
+ end
160
+ end
161
+ # now generate rules
162
+ rule_store = Evoc::RuleStore.new(query: query)
163
+ rules.each do |consequent,antecedents|
164
+ antecedents.each do |antecedent|
165
+ rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
166
+ end
167
+ end
168
+ return rule_store
169
+ end
170
+
171
+
108
172
  ##
109
173
  # TARMAQ
110
174
  # find largest subsets in @query with evidence in @tx_store version
@@ -113,7 +177,6 @@ module Evoc
113
177
  #initial filter, we consider all txes where something in the query changed
114
178
  query_changed_in = tx_store.transactions_of_list(query)
115
179
  # now find what subsets of the query changed in each tx
116
- rules = Hash.new
117
180
  query_changed_in.each do |tx_id|
118
181
  tx = tx_store.get_tx(id:tx_id,id_type: :index)
119
182
  largest_match_in_query = (query & tx.items)
@@ -134,14 +197,18 @@ module Evoc
134
197
  ###
135
198
  ## rose
136
199
  ###
137
- def self.rose_algorithm(tx_store:,query:)
200
+ def self.rose(tx_store:,query:)
138
201
  qs = query.size
139
202
  self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
140
203
  end
141
204
 
142
- def self.co_change_algorithm(tx_store:, query:)
205
+ def self.co_change(tx_store:, query:)
143
206
  self.cached_rule_range(1,1,tx_store: tx_store, query: query)
144
207
  end
145
208
 
209
+ def self.closed_rules(tx_store:, query:)
210
+ Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
211
+ end
212
+
146
213
  end # Algorithm
147
214
  end
@@ -0,0 +1,145 @@
1
+ module Evoc
2
+ class ClosedRules
3
+ def self.closed_rules(tx_store:,query:)
4
+ # @@store = tx_store
5
+ # create initial trees, one tree per consequent
6
+ tree = self.initialize_tree(tx_store,query)
7
+ # puts "INIT TREE:"
8
+ # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
9
+ closed_rules = Evoc::RuleStore.new(query: query)
10
+ tree.children.each do |consequent|
11
+ self.extend_nodes(consequent).each do |frequency, closed_sets|
12
+ closed_sets.each do |closed_set|
13
+ antecedent = closed_set - consequent.name
14
+ closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
15
+ end
16
+ end
17
+ end
18
+ return closed_rules
19
+ end
20
+
21
+ private
22
+ def self.initialize_tree(tx_store, query)
23
+ tree = Tree::TreeNode.new([])
24
+ # find all items that changed with something in the query
25
+ query_changed_in = tx_store.transactions_of_list(query)
26
+ # store all items from the query that have changed with each consequent
27
+ query_changed_in.each do |tx_id|
28
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
29
+ antecedent = (query & tx.items)
30
+ consequents = (tx.items - antecedent)
31
+ if consequents.size != 0
32
+ consequents.each do |consequent|
33
+ consequent_key = [consequent.to_s]
34
+ if tree[consequent_key].nil?
35
+ # initialize candidates
36
+ tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
37
+ end
38
+ txes_consequent = tree[consequent_key].content
39
+ antecedent.each do |item|
40
+ union = [item.to_s,consequent.to_s]
41
+ if tree[consequent_key][union].nil?
42
+ txes_union = tx_store.transactions_of(item) & txes_consequent
43
+ tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ return(tree)
50
+ end
51
+
52
+ def self.extend_nodes(root,closed_rules: {})
53
+ current_node = root.first_child
54
+ while(!current_node.nil?) do
55
+ a = current_node
56
+ b = a.next_sibling
57
+ while(!b.nil?) do
58
+ # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
59
+ ab = a.name | b.name
60
+ a_txes = a.content
61
+ b_txes = b.content
62
+ ab_txes = a_txes & b_txes
63
+ # check properties
64
+ # 1. when txes are the same
65
+ # - remove B
66
+ # - replace all A with union of A and B
67
+ if ab_txes.size > 0
68
+ case self.compare(a_txes,b_txes)
69
+ when 'EQUAL'
70
+ # puts " EQUAL"
71
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
72
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
73
+ temp = b.previous_sibling
74
+ root.remove!(b)
75
+ b = temp
76
+ a.each {|n| n.rename(ab | n.name)}
77
+ when 'A_IN_B'
78
+ # puts " A in B"
79
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
80
+ a.each {|n| n.rename(ab | n.name)}
81
+ when 'B_IN_A'
82
+ # puts " B in A"
83
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
84
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
85
+ temp = b.previous_sibling
86
+ root.remove!(b)
87
+ b = temp
88
+ a << Tree::TreeNode.new(ab,ab_txes)
89
+ when 'NOT_EQUAL'
90
+ # puts " NOT EQUAL"
91
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
92
+ a << Tree::TreeNode.new(ab,ab_txes)
93
+ end
94
+ end
95
+ # puts "NEW TREE:"
96
+ # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
97
+ b = b.next_sibling
98
+ # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
99
+ # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
100
+ # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
101
+ end # siblings.each
102
+ if !a.children.empty?
103
+ # puts "TRAVERSING DOWN"
104
+ self.extend_nodes(a, closed_rules: closed_rules)
105
+ end
106
+ # add node as closed rule if not subsumed by another rule already added
107
+ rule_frequency = a.content.size
108
+ rule = a.name
109
+ if closed_rules[rule_frequency].nil?
110
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
111
+ closed_rules[rule_frequency] = [rule]
112
+ else
113
+ if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
114
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
115
+ closed_rules[rule_frequency] << rule
116
+ else
117
+ # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
118
+ end
119
+ end
120
+ current_node = current_node.next_sibling
121
+ end # children.each
122
+ return(closed_rules)
123
+ end #extend_nodes
124
+
125
+ def self.compare(a,b)
126
+ if a == b
127
+ return 'EQUAL'
128
+ # 2. when A is a subset of B
129
+ # - replace all A with union of A and B
130
+ elsif (a - b).empty?
131
+ return 'A_IN_B'
132
+ # 2. when B is a subset of A
133
+ # - remove B
134
+ # - add the union as new child
135
+ elsif (b - a).empty?
136
+ return 'B_IN_A'
137
+ # 4. contain different elements
138
+ # - add the union as new child
139
+ else
140
+ return 'NOT_EQUAL'
141
+ end
142
+ end
143
+ end
144
+ end
145
+
data/lib/evoc/evaluate.rb CHANGED
@@ -1,42 +1,251 @@
1
1
  module Evoc
2
- class Evaluate
2
+ module Evaluate
3
3
  extend Logging
4
4
 
5
+ def self.validateInput(input)
6
+ # verify format
7
+ if !input.is_a?(Array) || # not an array
8
+ !input.first.is_a?(Array) || # not containg an array
9
+ ![0,1].include?(input.first.first) # items are not 0s and 1s
10
+ raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected list of list of 0s and 1s, input was: #{input}"
11
+ end
12
+ end
5
13
 
6
- def self.execute(recommendation,expected_outcome,evaluator)
7
- if match = /average_precision(?<num>\d+)?/.match(evaluator)
8
- if match[:num].nil?
9
- self.average_precision(recommendation,expected_outcome)
10
- else
11
- self.average_precision(recommendation,expected_outcome,n: match[:num].to_i)
14
+ def self.mean_confidence(rules:)
15
+ if rules.empty? then return nil end
16
+ return (rules.inject(0) {|sum,r| sum + r.m_confidence.value}/rules.size).to_f
17
+ end
18
+
19
+ def self.discernibility(rec:)
20
+ # AP is 0 for the empty list
21
+ if rec.is_a?(Array) && rec.empty? # array and empty
22
+ return nil
23
+ end
24
+ self.validateInput(rec)
25
+
26
+ rec_size = 0
27
+ rec_clusters = 0
28
+
29
+ rec.each do |c|
30
+ rec_clusters = rec_clusters + 1
31
+ c.each do |e|
32
+ rec_size = rec_size + 1
12
33
  end
13
- elsif match = /top10_recall/.match(evaluator)
14
- self.top10_recall(recommendation,expected_outcome)
15
- else raise ArgumentError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
16
34
  end
35
+ return (rec_clusters/rec_size).to_f
36
+
17
37
  end
18
38
 
19
- # calculate the ratio of correct items in the top 10
20
- # @param [Array] recommendation a sorted array
21
- # @param [Array] expected_outcome an array of items
22
- # @return [Rational] the top10 recall
23
- def self.top10_recall(recommendation,expected_outcome)
24
- if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
25
- if (expected_outcome.size > 0) & !recommendation.empty?
26
- top10 = recommendation.take(10).flatten
27
- common_items = (expected_outcome & top10).size.to_r
28
- return common_items/expected_outcome.size
29
- else
30
- nil
39
+ def self.applicable(rec:)
40
+ if rec.is_a?(Array)
41
+ (rec <=> []).abs
42
+ else
43
+ raise Evoc::Exceptions::FormatError.new "Wrong format given to #{__method__}, expected an array, input was: #{input}"
44
+ end
45
+ end
46
+
47
+ ##
48
+ # @return the f1 score (preision/recall harmonic mean)
49
+ def self.f1(rec:,exp:)
50
+ # AP is 0 for the empty list
51
+ if rec.is_a?(Array) && rec.empty? # array and empty
52
+ return nil
53
+ end
54
+ self.validateInput(rec)
55
+
56
+ rec_size = 0
57
+ rec_correct = 0
58
+
59
+ rec.each do |c|
60
+ c.each do |e|
61
+ rec_size = rec_size + 1
62
+ rec_correct = rec_correct + e
63
+ end
64
+ end
65
+ return (2*rec_correct/(rec_size + exp)).to_f
66
+ end
67
+
68
+
69
+ ##
70
+ # @return the rank of the first relevant itemjk
71
+ def self.first_relevant(rec:)
72
+ # AP is 0 for the empty list
73
+ if rec.is_a?(Array) && rec.empty? # array and empty
74
+ return nil
75
+ end
76
+ self.validateInput(rec)
77
+
78
+ last_checked = 1
79
+ rec.each do |c|
80
+ c.each do |e|
81
+ if e == 1
82
+ return last_checked
83
+ end
84
+ last_checked = last_checked + 1
31
85
  end
86
+ end
87
+ return nil
32
88
  end
33
89
 
90
+ ##
91
+ # @return the rank of the last relevant itemjk
92
+ def self.last_relevant(rec:)
93
+ # AP is 0 for the empty list
94
+ if rec.is_a?(Array) && rec.empty? # array and empty
95
+ return nil
96
+ end
97
+ self.validateInput(rec)
98
+
99
+ size = rec.inject(0) {|sum,c| sum + c.size}
100
+ last_checked = size
101
+ rec.reverse_each do |c|
102
+ c.reverse_each do |e|
103
+ if e == 1
104
+ return last_checked
105
+ end
106
+ last_checked = last_checked - 1
107
+ end
108
+ end
109
+ return nil
110
+ end
111
+
112
+ def self.recall10(rec:,exp: nil)
113
+ # AP is 0 for the empty list
114
+ if rec.is_a?(Array) && rec.empty? # array and empty
115
+ return nil
116
+ end
117
+ self.validateInput(rec)
118
+ return self.recall(rec: [rec.take(10).flatten.take(10)],exp: exp)
119
+ end
120
+
121
+ def self.precision10(rec:,exp: nil)
122
+ # AP is 0 for the empty list
123
+ if rec.is_a?(Array) && rec.empty? # array and empty
124
+ return nil
125
+ end
126
+ self.validateInput(rec)
127
+ return self.precision(rec: [rec.take(10).flatten.take(10)])
128
+ end
129
+
130
+ def self.precision(rec:,exp: nil)
131
+ # AP is 0 for the empty list
132
+ if rec.is_a?(Array) && rec.empty? # array and empty
133
+ return nil
134
+ end
135
+ self.validateInput(rec)
136
+
137
+ size_rec = rec.inject(0) {|sum,c| sum + c.size}
138
+ num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
139
+
140
+ return (num_correct_in_rec/size_rec).to_f
141
+ end
142
+
143
+ def self.recall(rec:,exp: nil)
144
+ # AP is 0 for the empty list
145
+ if rec.is_a?(Array) && rec.empty? # array and empty
146
+ return nil
147
+ end
148
+ self.validateInput(rec)
149
+
150
+ num_correct_in_rec = rec.inject(0) {|sum,c| sum + c.inject(&:+)}
151
+
152
+ if exp.nil?
153
+ return num_correct_in_rec
154
+ else
155
+ if num_correct_in_rec > exp
156
+ raise ArgumentError, "Found more relevant items than the provided number of relevant items"
157
+ end
158
+ return (num_correct_in_rec/exp).to_f
159
+ end
160
+ end
161
+ # clustered recommendation is expected to be a sorted list V
162
+ # where V = [V1,V2,..Vn]
163
+ # and Vi is a cluster of items with the same weight like [rel_1,rel_2,..,rel_n]
164
+ # where rel_i is 1 if the item is relevant and 0 if not
165
+
166
+ # r_p : relevant items in previous groups
167
+ # i_p : index previous group
168
+ # r_g : relevant items in group
169
+ # n_g : items in group
170
+ # i : index of current item
171
+ def self.t_ap(rec:,exp: nil)
172
+ # AP is 0 for the empty list
173
+ if rec.is_a?(Array) && rec.empty? # array and empty
174
+ return nil
175
+ end
176
+ self.validateInput(rec)
177
+
178
+ ap = 0
179
+ r_p = 0
180
+ i_p = 0
181
+ rec.each do |cluster|
182
+ r_g = cluster.inject(&:+).to_r
183
+ n_g = cluster.size.to_r
184
+ cluster.each_with_index do |_,i|
185
+ i = i_p + i + 1
186
+ chance_relevant = r_g/n_g
187
+ avg_previous_rel = if (n_g == 1)
188
+ (r_p + 1) * (1/i)
189
+ else
190
+ (r_p + (i - i_p - 1)*((r_g-1)/(n_g-1)) + 1) * (1/i)
191
+ end
192
+
193
+ item_ap_contribution = chance_relevant * avg_previous_rel
194
+
195
+ ap = ap + item_ap_contribution
196
+ end
197
+ r_p = r_p + r_g
198
+ i_p = i_p + n_g
199
+ end
200
+ # if the number of relevant documents is not supplied
201
+ # assume that the recommendation contains all relevant documents
202
+ if exp.nil?
203
+ exp = r_p
204
+ else
205
+ if r_p > exp
206
+ raise ArgumentError, "Found more relevant items than the provided number of relevant items"
207
+ end
208
+ end
209
+ return (r_p == 0 ? 0 : (ap/exp).to_f)
210
+ end
211
+
212
+ def self.ap(rec:,exp: nil)
213
+ # AP is 0 for the empty list
214
+ if rec.is_a?(Array) && rec.empty? # array and empty
215
+ return nil
216
+ end
217
+ self.validateInput(rec)
218
+
219
+ i = 0
220
+ correct_i = 0
221
+ ap = 0
222
+
223
+ rec.each do |cluster|
224
+ cluster.each do |item|
225
+ i = i + 1
226
+ correct_i = correct_i + item
227
+ precision_i = correct_i/i
228
+ ap = ap + (precision_i*item)
229
+ end
230
+ end
231
+
232
+ if exp.nil?
233
+ exp = correct_i
234
+ else
235
+ if correct_i > exp
236
+ raise ArgumentError, "Found more relevant items than the provided number of relevant items"
237
+ end
238
+ end
239
+ return (exp == 0 ? 0 : (ap/exp).to_f)
240
+
241
+ end
34
242
  ##
35
243
  # calculate the average precision of the result based on an expected outcome
36
244
  # @param [Array] recommendation a sorted array
37
245
  # @param [Array] expected_outcome an array of items
38
246
  # @return [Float] the average precision
39
- def self.average_precision(recommendation,expected_outcome, n: recommendation.size)
247
+ def self.average_precision(recommendation,expected_outcome)
248
+ raise Error.new "#average_precision has been deprecated, use #ap instead"
40
249
  if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
41
250
  if (expected_outcome.size > 0) & !recommendation.empty?
42
251
  average_precision = 0
@@ -45,7 +254,7 @@ module Evoc
45
254
  # sort rules by weight
46
255
  # we first group rules with equal weights
47
256
  # and then sort the groups by weight
48
- recommendation.take(n).each do |items|
257
+ recommendation.each do |items|
49
258
  if !items.is_a?(Array) then items = [items] end
50
259
  if items.first.class != expected_outcome.first.class
51
260
  raise ArgumentError, "Expected outcome was of type #{expected_outcome.first.class}, while the item in the recommendation was of type #{items.first.class}"
@@ -71,39 +280,5 @@ module Evoc
71
280
  nil
72
281
  end
73
282
  end
74
-
75
- # calculate the grouped average precision of the result based on an expected outcome
76
- def self.e_collected_average_precision(expected_outcome)
77
- if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
78
- if (expected_outcome.size > 0) & !self.empty?
79
- collected_average_precision = 0
80
- correct_items = []
81
- total_items_considered = []
82
- # sort rules by weight
83
- # we first group rules with equal weights
84
- # and then sort the groups by weight
85
- groups = self.group_by {|r| r.weight}.sort.reverse
86
- groups.each do |(_,rules)|
87
- items = rules.map(&:rhs).flatten.uniq
88
- if (new_items = items - total_items_considered).size > 0
89
- new_items.each {|item| total_items_considered << item}
90
- if correct_in_group = (items & expected_outcome)
91
- if correct_in_group.size > 0
92
- # make sure that the new items havent already been added earlier
93
- new_correct = (correct_in_group - correct_items)
94
- # add new items
95
- new_correct.each {|item| correct_items << item}
96
- change_in_recall = new_correct.size.to_r/expected_outcome.size
97
- precision_at_k = correct_items.size.to_r/total_items_considered.size
98
- collected_average_precision += (precision_at_k * change_in_recall)
99
- end
100
- end
101
- end
102
- end
103
- self.collected_average_precision = collected_average_precision.to_f
104
- else
105
- self.collected_average_precision = nil
106
- end
107
- end
108
283
  end
109
284
  end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class ConfigurationError < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class FormatError < StandardError
4
+ end
5
+ end
6
+ end
@@ -78,6 +78,23 @@ module Evoc
78
78
  # tx_id, query
79
79
  #
80
80
  def generate_queries
81
+ ##
82
+ # write dict
83
+ ##
84
+ if path = self.opts[:write_dict]
85
+ tmp = Tempfile.new('dict')
86
+ begin
87
+ tmp.puts("id,name")
88
+ Evoc::HistoryStore.base_history.int_2_name.each do |id,name|
89
+ tmp.puts("#{id},#{name}")
90
+ end
91
+ tmp.close
92
+ FileUtils.mv(tmp.path,path)
93
+ ensure
94
+ tmp.close
95
+ tmp.unlink
96
+ end
97
+ end
81
98
  ##
82
99
  # WRITE CSV HEADER
83
100
  CSV {|row| row << %W(tx_id query)}
@@ -124,7 +141,7 @@ module Evoc
124
141
  # 2. randomly select X in specified = Y
125
142
  # 3. randomly select Y in tx
126
143
  elsif !random_sizes.empty? & !specified_sizes.empty?
127
- specified_sizes.select! {|s| (s < tx_size) & (s > 1)} #1.
144
+ specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
128
145
  if randomly_sampled_size = specified_sizes.sample #2.
129
146
  sampled_queries = [items.sample(randomly_sampled_size)] #3.
130
147
  end
@@ -216,27 +233,53 @@ module Evoc
216
233
  break
217
234
  end
218
235
  end
236
+ # get query
237
+ query_hash = query.to_h
238
+ # convert query string to array of items
239
+ query_hash['query'] = query_hash['query'].split(',').map(&:to_i)
240
+ # verify query before executing
241
+ if tx = Evoc::HistoryStore.base_history.get_tx(id: query_hash['tx_id'],id_type: :id)
242
+ if !(query_hash['query'] - tx.items).empty?
243
+ raise Evoc::Exceptions::ConfigurationError.new "The query generated from #{query_hash['tx_id']} was not a subset of the same tx in the loaded history. The query was: '#{query_hash['query']}', the tx was '#{tx.items}'"
244
+ end
245
+ else
246
+ raise Evoc::Exceptions::ConfigurationError.new "Could not find the tx: '#{query_hash['tx_id']}' from #{self.opts[:queries]} in the history #{self.opts[:transactions]}"
247
+ end
248
+
219
249
  current_scenario = 1
250
+ last_error = 'no errors'
220
251
  # - compact removes nil values (not used factors)
221
252
  # - the splat operator '*' turns the array into parameters for #product
222
253
  # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
223
254
  factors.first.product(*factors[1..-1]).each do |scenario|
224
255
  # Print progress to stderr
225
- STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines} (scenarios skipped: #{invalid_configuration}) \r"
256
+ STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
257
+ if invalid_configuration > 0
258
+ STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
259
+ else
260
+ STDERR.print " \r"
261
+ end
226
262
 
227
- query_hash = query.to_h
228
- # convert query to array
229
- query_hash['query'] = query_hash['query'].split(',')
230
263
  params = query_hash.merge(scenario.to_h)
231
264
  params[:case_id] = self.opts[:case_id]
232
265
  params[:granularity] = self.opts[:granularity]
233
266
  # initialize scenario
234
- q = Evoc::Scenario.new(params)
267
+ s = Evoc::Scenario.new(params)
235
268
  begin
236
- result = q.call(evaluators: self.opts[:evaluators]).to_json
237
- $stdout.puts result
269
+ Evoc::RecommendationCache.get_recommendation(algorithm: s.algorithm,
270
+ query: s.query,
271
+ model_start: s.model_start,
272
+ model_end: s.model_end,
273
+ max_size: s.max_size,
274
+ aggregator: s.aggregator,
275
+ measures: s.measures)
276
+ Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], top_k: self.opts[:top_k], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
277
+ result = Evoc::RecommendationCache.to_h(measures: s.measures)
278
+ # merge scenario params with result hash and dump as json
279
+ $stdout.puts s.to_h.merge(result).to_json
238
280
  rescue ArgumentError => e
239
281
  invalid_configuration += 1
282
+ last_error = e.message
240
283
  end
241
284
  current_scenario += 1
242
285
  end
@@ -9,7 +9,7 @@ module Evoc
9
9
  # time: the time it took to generate the currently cached recommendation
10
10
  # model_size: the number of transactions used when generating the currently cached recommendation
11
11
  class << self
12
- attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :time_evaluation, :filtered_model_size, :evaluation
12
+ attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
13
13
  end
14
14
 
15
15
  def self.recommendation_cached?(algorithm:,
@@ -20,6 +20,7 @@ module Evoc
20
20
  return self.tag == [algorithm,query,model_start,model_end,max_size].hash
21
21
  end
22
22
 
23
+
23
24
  def self.get_recommendation(algorithm:,
24
25
  query:,
25
26
  model_start:,
@@ -75,14 +76,13 @@ module Evoc
75
76
  # @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
76
77
  #
77
78
  # @return [Hash[aggregator][evaluator][result]] the hash of results
78
- def self.evaluate(evaluators: ,expected_outcome:,measure_combination: )
79
+ def self.evaluate_last(evaluators: ,top_k: nil, unique_consequents: nil,expected_outcome:,measure_combination: )
79
80
  if !self.last_recommendation.nil?
80
- t1 = Time.new
81
- evaluators.each do |evaluator|
82
- self.evaluation[evaluator] = self.last_recommendation.evaluate_with(evaluator: evaluator,expected_outcome: expected_outcome,measure_combination: measure_combination)
83
- end
84
- t2 = Time.new
85
- self.time_evaluation = TimeDifference.between(t1,t2).in_seconds.round(8)
81
+ self.evaluation = self.last_recommendation.evaluate_with(evaluators: evaluators,
82
+ top_k: top_k,
83
+ unique_consequents: unique_consequents,
84
+ expected_outcome: expected_outcome,
85
+ measure_combination: measure_combination)
86
86
  else
87
87
  STDERR.puts "TAG = #{self.tag}No recommendation to evaluate"
88
88
  end
@@ -94,10 +94,7 @@ module Evoc
94
94
  # time: 'execution time',
95
95
  # filtered_model_size:
96
96
  # number_of_rules :
97
- # evaluation: {
98
- # average_precision: ..,
99
- # ..next evaluator..
100
- # }
97
+ # average_precision:
101
98
  # rules: [
102
99
  # {
103
100
  # lhs: [lhs]
@@ -118,7 +115,6 @@ module Evoc
118
115
  recommendation_hash[:time_rulegeneration] = self.time_rulegeneration
119
116
  recommendation_hash[:time_measurecalculation] = self.time_measurecalculation
120
117
  recommendation_hash[:time_aggregation] = self.time_aggregation
121
- recommendation_hash[:time_evaluation] = self.time_evaluation
122
118
  recommendation_hash[:filtered_model_size] = self.filtered_model_size
123
119
  recommendation_hash[:number_of_baserules] = self.base_recommendation.size
124
120
  recommendation_hash[:number_of_rules] = self.last_recommendation.size
@@ -128,10 +124,11 @@ module Evoc
128
124
  sum + r.get_measure('m_hyper_coefficient').value } / self.last_recommendation.size
129
125
  recommendation_hash[:largest_antecedent] = self.last_recommendation.largest_antecedent
130
126
  if !self.evaluation.nil?
131
- self.evaluation.each do |evaluator,value|
132
- recommendation_hash[evaluator.to_sym] = value[:value]
133
- recommendation_hash[:unique_consequents] = value[:unique_consequents]
134
- end
127
+ self.evaluation.each do |evaluator,results|
128
+ recommendation_hash[evaluator] = results['value']
129
+ # time can also be added like this:
130
+ # recommendation_hash[evaluator+'_time'] = results['time']
131
+ end
135
132
  end
136
133
  recommendation_hash[:rules] = []
137
134
  self.last_recommendation.each do |rule|
data/lib/evoc/rule.rb CHANGED
@@ -21,15 +21,19 @@ module Evoc
21
21
  name
22
22
  end
23
23
 
24
+ def human_name
25
+ "#{human_lhs} -> #{human_rhs}"
26
+ end
27
+
24
28
  def human_lhs
25
- if !self.tx_store.nil? & self.lhs.all? {|i| i.is_a?(Numeric)}
26
- self.lhs.map{|i| self.tx_store.int_2_name[i]}
29
+ if !self.tx_store.nil? # & self.lhs.all? {|i| i.is_a?(Numeric)}
30
+ self.tx_store.ints2names(self.lhs.map(&:to_i)).join(',')
27
31
  end
28
32
  end
29
33
 
30
34
  def human_rhs
31
- if !self.tx_store.nil? & self.rhs.all? {|i| i.is_a?(Numeric)}
32
- self.rhs.map{|i| self.tx_store.int_2_name[i]}
35
+ if !self.tx_store.nil? #& self.rhs.all? {|i| i.is_a?(Numeric)}
36
+ self.tx_store.ints2names(self.rhs.map(&:to_i)).join(',')
33
37
  end
34
38
  end
35
39
 
@@ -116,6 +116,33 @@ module Evoc
116
116
  end
117
117
 
118
118
 
119
+ # Needed by Evaluate mixin
120
+ def evaluation_format(measures:, expected_outcome:)
121
+ current_weight = nil
122
+ current_group = []
123
+ recommendation = []
124
+ # sort and filter out duplicate consequents
125
+ self.sort_on(measures: measures, rules: self.unique_by(measures.first)).each do |r|
126
+ expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
127
+ weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
128
+ if current_weight.nil?
129
+ current_weight = weight_tag
130
+ end
131
+ if weight_tag == current_weight
132
+ current_group << expected
133
+ else
134
+ recommendation << current_group
135
+ current_group = [expected]
136
+ current_weight = weight_tag
137
+ end
138
+ end
139
+ # add last group if not empty
140
+ if !current_group.empty?
141
+ recommendation << current_group
142
+ end
143
+ return recommendation
144
+ end
145
+
119
146
  ##
120
147
  # Evaluate this recommendation using the given evaluator
121
148
  #
@@ -126,27 +153,35 @@ module Evoc
126
153
  # @param [String] evaluator the method to use for evaluating
127
154
  # @param [Array] expected_outcome the list of items to evaluate against
128
155
  # @param [Array] measure_combination the list of measures used to first sort the recommendation
129
- def evaluate_with(evaluator: :average_precision,expected_outcome:,measure_combination: )
156
+ def evaluate_with(evaluators:,expected_outcome:,measure_combination:,top_k: nil,unique_consequents: nil)
130
157
  if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
131
- result = Hash.new
132
- logger.debug "#{__method__} params: evaluator: #{evaluator}, measure_combination: #{measure_combination}"
158
+ logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
133
159
  # sort the rules on each combination and evaluate
134
- sorted_rules = []
135
- if self.aggregator =~ /_hc\z/
136
- sorted_rules = self.sort_on(measures: measure_combination+['m_hyper_coefficient'])
137
- elsif !self.aggregator.nil?
138
- sorted_rules = self.sort_on(measures: measure_combination)
139
- else
140
- # not an aggregation
141
- # get the strongest unique rules
142
- unique_rules = self.unique_by(measure_combination.first)
143
- sorted_rules = self.sort_on(rules: unique_rules,measures: measure_combination)
144
- end
145
- # get the recommended items
146
- recommendation = sorted_rules.map(&:rhs)
160
+ # if !top_k.nil?
161
+ # raise ArgumentError, "Top K must be a number" unless top_k.is_a?(Numeric)
162
+ # sorted_rules = sorted_rules.take(top_k)
163
+ # end
164
+ # convert rules into format used in evaluation
165
+ # map to 0/1 list where 1 is a correct item and 0 is not
166
+ # second item in each tuple gives the weight of the rule
147
167
  # evaluate the sorted list against the expected outcome
148
- result[:value] = Evoc::Evaluate.execute(recommendation,expected_outcome,evaluator)
149
- return result
168
+ recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome)
169
+ potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
170
+ results = Hash.new
171
+ evaluators.each do |evaluator|
172
+ t1 = Time.new
173
+ if Evoc::Evaluate.respond_to?(evaluator)
174
+ results[evaluator] = Hash.new
175
+ method_params = Evoc::Evaluate.method(evaluator).parameters.map(&:second)
176
+ params = potential_params.select {|k,v| method_params.include?(k)}
177
+ results[evaluator]['value'] = Evoc::Evaluate.method(evaluator).call(params)
178
+ else
179
+ raise NoMethodError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
180
+ end
181
+ t2 = Time.new
182
+ results[evaluator]['time'] = TimeDifference.between(t1,t2).in_seconds.round(8)
183
+ end
184
+ return results
150
185
  end
151
186
 
152
187
  ##
@@ -163,9 +198,9 @@ module Evoc
163
198
  # where each consequent is the strongest given by the input measure
164
199
  #
165
200
  # @param: [String] measure the measure used to find the strongest rules
166
- def unique_by(measure)
201
+ def unique_by(measure, rules: self)
167
202
  selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
168
- self.each do |rule|
203
+ rules.each do |rule|
169
204
  if !rule.get_measure(measure).value.nil?
170
205
  key = rule.rhs.first
171
206
  if selected_rules[key].nil?
@@ -223,7 +258,7 @@ module Evoc
223
258
  csv << ['rule'] + defined_measures
224
259
  self.each do |rule|
225
260
  row = CSV::Row.new([],[],false)
226
- row << rule.name
261
+ row << rule.human_name
227
262
  defined_measures.each do |m|
228
263
  row << rule.get_measure(m).value
229
264
  end
@@ -287,11 +322,9 @@ module Evoc
287
322
  end
288
323
 
289
324
  def ==other
290
- ( self.map(&:lhs) == other.map(&:lhs) ) &
291
- ( self.map(&:rhs) == other.map(&:rhs) ) &
292
- Evoc::Rule.measures.each do |m|
293
- self.map {|r| r.get_measure(m)} == other.map {|r| r.get_measure(m)}
294
- end
325
+ self_rules = self.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
326
+ other_rules = other.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"}
327
+ self_rules == other_rules
295
328
  end
296
329
 
297
330
  def size
data/lib/evoc/scenario.rb CHANGED
@@ -53,30 +53,6 @@ module Evoc
53
53
  comparison
54
54
  end
55
55
 
56
- ##
57
- # Executes a query given the current paramaters
58
- # This results in a set of association rules, i.e., a recommendation
59
- #
60
- # Producing a recommendation is done through the following process:
61
- #
62
- # 1. Generate rules using a mining algorithm on the specified history
63
- # 2. Calculate interestingness measures on the generated rules
64
- # (optional) 3. Aggregate rules to further improve recommendation
65
- # (optional) 4. Evaluate how good the recommendation is
66
- #
67
- # @return [Hash] containing the query + scenario + recommendation + other metadata
68
- def call(evaluators: [])
69
- #generate recommendation in cache (generate rules + measures on rules)
70
- self.recommendation
71
-
72
- # evaluate if requested
73
- if !evaluators.empty?
74
- Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
75
- end
76
- # build return hash
77
- recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
78
- return self.to_h.merge(recommendation)
79
- end
80
56
 
81
57
  def to_h
82
58
  fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
@@ -88,16 +64,6 @@ module Evoc
88
64
  return hash
89
65
  end
90
66
 
91
- def recommendation
92
- Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
93
- query: self.query,
94
- model_start: self.model_start,
95
- model_end: self.model_end,
96
- max_size: self.max_size,
97
- aggregator: self.aggregator,
98
- measures: self.measures)
99
- end
100
-
101
67
  def recommendation?
102
68
  Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
103
69
  query: self.query,
@@ -0,0 +1,26 @@
1
+ # Extending the rubytree gem with some additional methods
2
+ # see: http://rubytree.anupamsg.me/
3
+ module Tree
4
+ class TreeNode
5
+
6
+ ##
7
+ # @return the right siblings of the current node
8
+ def right_siblings
9
+ if self.is_last_sibling?
10
+ return []
11
+ else
12
+ return [self.next_sibling] + self.next_sibling.right_siblings
13
+ end
14
+ end
15
+
16
+ ##
17
+ # @return the left siblings of the current node
18
+ def left_siblings
19
+ if self.is_first_sibling?
20
+ return []
21
+ else
22
+ return [self.previous_sibling] + self.previous_sibling.left_siblings
23
+ end
24
+ end
25
+ end
26
+ end
data/lib/evoc/tx_store.rb CHANGED
@@ -26,6 +26,14 @@ module Evoc
26
26
  end
27
27
 
28
28
 
29
+ def names2ints(names)
30
+ names.map {|n| self.name_2_int[n]}
31
+ end
32
+
33
+ def ints2names(ints)
34
+ ints.map {|i| self.int_2_name[i]}
35
+ end
36
+
29
37
  ##
30
38
  # self << tx
31
39
  #
data/lib/evoc/util.rb ADDED
@@ -0,0 +1,37 @@
1
+
2
+ module Evoc
3
+ module Util
4
+ # helper function to generate a lattice so we can easily come up with tests for the closed rules mining
5
+ # examples nodes: [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
6
+ # first elem is item name
7
+ # second elem is the txes where this item changes
8
+ def self.lattice(nodes,filter: nil)
9
+ (1..nodes.size).each do |n|
10
+ nodes.combination(n).each do |comb|
11
+ # [['a',[1,2]],['b',[2,3]]]
12
+ union = comb.map(&:first).join(',')
13
+ frequency = comb.map(&:second).inject(&:&).size
14
+ if filter =~ union
15
+ if frequency > 0
16
+ printf("%#{nodes.size*2}s",[union,frequency].join(':'))
17
+ end
18
+ end
19
+ end
20
+ puts
21
+ end
22
+ end
23
+
24
+ # helper function for generating a txstore from the following format
25
+ # [['a',[1,2]],['b',[2,3]],['c',[1,2,3]]]
26
+ # (same structure as used for lattice creation)
27
+ def self.nodes2txstore(nodes)
28
+ txes = nodes.map(&:second).inject(&:|)
29
+ store = Evoc::TxStore.new
30
+ txes.each do |id|
31
+ items = nodes.select {|n| n.second.include?(id)}.map(&:first)
32
+ store << Evoc::Tx.new(id: id, items: items)
33
+ end
34
+ return(store)
35
+ end
36
+ end
37
+ end
data/lib/evoc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.6.2"
2
+ VERSION = "3.7.0"
3
3
  end
@@ -1,10 +1,20 @@
1
1
  require_relative 'cli_helper'
2
+ # override printing of help text as the default does not respect spaces and adds newlines
3
+ class Thor
4
+ module Shell
5
+ class Basic
6
+ def print_wrapped(message, options = {})
7
+ stdout.puts message
8
+ end
9
+ end
10
+ end
11
+ end
2
12
 
3
13
  module EvocCLI
4
14
  class Experiment < Thor
5
15
  class_option :case_id, type: :string, desc: "Specify case identifier."
6
16
  class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
- class_option :transactions, :aliases => '-t', :type => :string, :required => true, :desc => "Path to change-history"
17
+ class_option :transactions, :aliases => '-t', :type => :string, :desc => "Path to change-history"
8
18
  class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
9
19
 
10
20
  ##
@@ -35,6 +45,7 @@ module EvocCLI
35
45
  desc: "Percentage of items to select for each query"
36
46
  method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
37
47
  method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
48
+ method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
38
49
  desc "generate_queries [options]", "Generate queries from <transactions>"
39
50
  def generate_queries
40
51
  #MemoryProfiler.start('create_queries',30)
@@ -59,7 +70,52 @@ module EvocCLI
59
70
  desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
60
71
  method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
61
72
  method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
73
+ method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
74
+ method_option :top_k, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER an evential unique consequents filter"
62
75
  desc "execute_scenarios [options]",""
76
+ long_desc <<-LONGDESC
77
+ keyword description
78
+ ------- -----------
79
+
80
+ case_id: user provided tag for the history used
81
+ granularity: granularity of the history used
82
+ scenario_id: a unique indentifier for this scenario
83
+ tx_id: the sha of the commit that the query was sampled from
84
+ tx_index: the index of this transaction in the used history (0 is oldest)
85
+ tx_size: the number of items in the transaction
86
+ query_size: the number of items in the query
87
+ query_percentage: query_size/tx_size
88
+ expected_outcome_size: tx - query
89
+ model_size: number of previous transactions relative to this one
90
+ model_hours: time span from the first transaction to this one
91
+ model_age: number of transactions between end of model and this transaction
92
+ max_size: transactions larger than this are filtered out before generating rules
93
+ filtered_model_size: model size after the max_size filtering
94
+ algorithm: the mining algorithm used to generate the recommendation
95
+ aggregator: the aggregation function used to aggregate the rules of the recommendation
96
+ measures: the interestingnessmeasures used to rank each rule
97
+ recommendation_tag: a unique identifiter of the rules used as a basis for the recommendation
98
+ time_rulegeneration: how long it took to generate the rules
99
+ time_measurecalculation: how long it took to calculate the measures for each rule
100
+ time_aggregation: how long it took to aggregate the rules
101
+ number_of_baserules: number of rules before aggregation
102
+ number_of_rules: number of rules after aggregation (equal to number_of_baserules when not aggregating)
103
+ number_of_hyperrules: number of hyper rules after aggregating
104
+ mean_hyper_coefficient: average number of rules aggregated in each hyper rule
105
+ largest_antecedent: number of items in the largest antecedent (lhs of rule)
106
+ t_ap: average precision where ties are accounted for
107
+ ap: the average precision
108
+ precision: ratio of correct to incorrect items
109
+ precision10: ratio of correct to incorrect items in the top 10
110
+ recall: ratio of correct items in recommendation to full set of expected items
111
+ recall19: ratio of correct items in recommendation to full set of expected items in the top 10
112
+ mean_confidence: the average confidence of the rules in this recommendation
113
+ discernibility: the number of uniquely weighted rules to the number of rules
114
+ applicable: 1 if rules were generated, 0 otherwise
115
+ f1: the f1 measure
116
+ first_relevant: the rank of the first correct item
117
+ last_relevant: the rank of the last correct item
118
+ LONGDESC
63
119
  def execute_scenarios
64
120
  if !options[:permutation].nil?
65
121
  STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
data/lib/evoc_helper.rb CHANGED
@@ -28,7 +28,10 @@ require 'logger'
28
28
  require 'zip'
29
29
  require 'zip/filesystem'
30
30
  require 'set'
31
+ require 'tempfile'
32
+ require 'fileutils'
31
33
  require 'algorithms' # various efficient data structures
34
+ require 'tree' #general purpose tree structure
32
35
  require 'mathn' # enhances the Rational (and others) number type
33
36
  Evoc::Env.load('google_hash',"please install to improve performance")
34
37
  #Evoc::Env.load('nmatrix')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.2
4
+ version: 3.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-20 00:00:00.000000000 Z
11
+ date: 2017-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -150,6 +150,20 @@ dependencies:
150
150
  - - ">="
151
151
  - !ruby/object:Gem::Version
152
152
  version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubytree
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
153
167
  description:
154
168
  email:
155
169
  - mail@thomasrolfsnes.com
@@ -172,12 +186,15 @@ files:
172
186
  - evoc.gemspec
173
187
  - lib/evoc.rb
174
188
  - lib/evoc/algorithm.rb
189
+ - lib/evoc/algorithms/closed_rules.rb
175
190
  - lib/evoc/algorithms/top_k.rb
176
191
  - lib/evoc/analyze.rb
177
192
  - lib/evoc/array.rb
178
193
  - lib/evoc/evaluate.rb
179
194
  - lib/evoc/exceptions/aggregation_error.rb
195
+ - lib/evoc/exceptions/configuration_error.rb
180
196
  - lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb
197
+ - lib/evoc/exceptions/format_error.rb
181
198
  - lib/evoc/exceptions/measure_calculation_error.rb
182
199
  - lib/evoc/exceptions/no_changed_items_in_changes.rb
183
200
  - lib/evoc/exceptions/no_changes_in_json_object.rb
@@ -206,8 +223,10 @@ files:
206
223
  - lib/evoc/rule_store.rb
207
224
  - lib/evoc/scenario.rb
208
225
  - lib/evoc/svd.rb
226
+ - lib/evoc/tree/tree_node.rb
209
227
  - lib/evoc/tx.rb
210
228
  - lib/evoc/tx_store.rb
229
+ - lib/evoc/util.rb
211
230
  - lib/evoc/version.rb
212
231
  - lib/evoc_cli/analyze.rb
213
232
  - lib/evoc_cli/cli_helper.rb