evoc 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b867c7a5e05b3c2be58b9dd361554a43f3f277c
4
- data.tar.gz: 9d00091a6fd7685f048930889aaf252df5aa6634
3
+ metadata.gz: d6583ca91c4e4987cd4816958b375795d5adfc9a
4
+ data.tar.gz: 8569042becd54ef9ebd4f2d94aac3fb106b9fb6d
5
5
  SHA512:
6
- metadata.gz: f89dbef20f735e0f8c6b8f7104ea8b38a4dd118707089b17acdf529591fd652478406047cb5d4b08e18afb746d3edf46dc7cf9ac7eb208d214f91e9050841c12
7
- data.tar.gz: 7e1955af5653df5d7afd986e9d522178b32d10c26771833e4d37aec910dd93208de56c8a6f83bd21faeb8796b098da808f72a05c9f6e3caf1100064fd20d7bb0
6
+ metadata.gz: 845bccb16af58856641c86224d03dd677c2d10cc69c545db2a5f9695765df3db1717a8b1d76e02c59d060eaa0a2290c1abe18bbd40980908d4db1faa4997306b
7
+ data.tar.gz: cb229b3e0cfea199d63929131258acc75a40d7ca872067740641bf324ec8c4d466fb16569131983a8f472e9962b5898c65ab731dfe4082be318645d56dba77f8
data/.gitignore CHANGED
@@ -15,3 +15,5 @@ evoc.log
15
15
  *TAGS
16
16
  *~
17
17
  *#
18
+ /spec/test_data/temp/*
19
+ /GPATH
@@ -105,36 +105,6 @@ module Evoc
105
105
  end
106
106
 
107
107
 
108
- def self.not_subsumed(tx_store:, query:)
109
- #initial filter, we consider all txes where something in the query changed
110
- query_changed_in = tx_store.transactions_of_list(query)
111
- # now find what subsets of the query changed in each tx
112
- trie = Containers::Trie.new
113
- query_changed_in.each do |tx_id|
114
- tx = tx_store.get_tx(id:tx_id,id_type: :index)
115
- antecedent = (query & tx.items)
116
- consequents = (tx.items - antecedent)
117
- if consequents.size != 0
118
- consequents.each do |consequent|
119
- entry = "#{consequent.to_s}#{antecedent.join('')}"
120
- if trie.get(entry).nil?
121
- puts "ADDED #{entry}"
122
- trie.push(entry,consequent.to_s)
123
- end
124
- end
125
- end
126
- end
127
- return trie
128
- # now generate rules
129
- # rule_store = Evoc::RuleStore.new(query: query)
130
- # rules.each do |consequent,antecedents|
131
- # antecedents.each do |antecedent|
132
- # rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
133
- # end
134
- # end
135
- # return rule_store
136
- end
137
-
138
108
  ##
139
109
  # Find the largest rules for each unique consequent
140
110
  def self.largest_rules(tx_store:,query:)
@@ -168,6 +138,57 @@ module Evoc
168
138
  return rule_store
169
139
  end
170
140
 
141
+ def self.hybrid(tx_store:,query:)
142
+ overlaps = self.largest_overlaps(tx_store: tx_store, query: query)
143
+ if overlaps.empty?
144
+ return Evoc::RuleStore.new(query: query) # no rules
145
+ else
146
+ if overlaps.size == 1
147
+ if overlaps.first.size == query.size # execute rose
148
+ return self.rose(tx_store: tx_store,query: query)
149
+ else
150
+ return self.tarmaq(0,tx_store: tx_store,query: overlaps.first) # execute tarmaq
151
+ end
152
+ else
153
+ store = Evoc::RuleStore.new(query: query)
154
+ overlaps.each do |overlap|
155
+ if overlap.size == 1 # execute co change
156
+ part_store = Evoc::Algorithm.co_change(tx_store: tx_store, query: overlap)
157
+ part_store.each {|r| store << r}
158
+ else # execute tarmaq
159
+ part_store = Evoc::Algorithm.tarmaq(0,tx_store: tx_store, query: overlap)
160
+ part_store.each {|r| store << r}
161
+ end
162
+ end
163
+ store.rules = store.select {|r| !query.include?(r.rhs.first)}
164
+ return store
165
+ end
166
+ end
167
+ end
168
+
169
+ def self.largest_overlaps(tx_store:,query:)
170
+ largest_match = []
171
+ #initial filter, we consider all txes where something in the query changed
172
+ query_changed_in = tx_store.transactions_of_list(query)
173
+ # now find what subsets of the query changed in each tx
174
+ query_changed_in.each do |tx_id|
175
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
176
+ largest_match_in_query = (query & tx.items)
177
+ match_size = largest_match_in_query.size
178
+ remainder_in_tx = tx.items - largest_match_in_query
179
+ if remainder_in_tx.size > 0
180
+ if match_size > largest_match.size
181
+ largest_match = largest_match_in_query
182
+ end
183
+ end
184
+ end
185
+ if largest_match.empty? #no more matches
186
+ return []
187
+ else
188
+ query_remainder = query - largest_match
189
+ return [largest_match] + self.largest_overlaps(tx_store: tx_store,query: query_remainder)
190
+ end
191
+ end
171
192
 
172
193
  ##
173
194
  # TARMAQ
@@ -206,7 +227,7 @@ module Evoc
206
227
  self.cached_rule_range(1,1,tx_store: tx_store, query: query)
207
228
  end
208
229
 
209
- def self.closed_rules(tx_store:, query:)
230
+ def self.tcharm(tx_store:, query:)
210
231
  Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
211
232
  end
212
233
 
data/lib/evoc/evaluate.rb CHANGED
@@ -11,11 +11,16 @@ module Evoc
11
11
  end
12
12
  end
13
13
 
14
+
14
15
  def self.mean_confidence(rules:)
15
16
  if rules.empty? then return nil end
16
17
  return (rules.inject(0) {|sum,r| sum + r.m_confidence.value}/rules.size).to_f
17
18
  end
18
19
 
20
+ def self.mean_confidence10(rules:)
21
+ return self.mean_confidence(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_confidence']).take(10).flatten.take(10))
22
+ end
23
+
19
24
  def self.discernibility(rec:)
20
25
  # AP is 0 for the empty list
21
26
  if rec.is_a?(Array) && rec.empty? # array and empty
@@ -65,6 +70,27 @@ module Evoc
65
70
  return (2*rec_correct/(rec_size + exp)).to_f
66
71
  end
67
72
 
73
+ ##
74
+ # @return an array containg the rank of each consequtive expected outcome
75
+ def self.relevant_ranks(rec:)
76
+ # AP is 0 for the empty list
77
+ if rec.is_a?(Array) && rec.empty? # array and empty
78
+ return []
79
+ end
80
+ self.validateInput(rec)
81
+
82
+ ranks = []
83
+ last_checked = 1
84
+ rec.each do |c|
85
+ c.each do |e|
86
+ if e == 1
87
+ ranks << last_checked
88
+ end
89
+ last_checked = last_checked + 1
90
+ end
91
+ end
92
+ return ranks
93
+ end
68
94
 
69
95
  ##
70
96
  # @return the rank of the first relevant itemjk
@@ -34,36 +34,42 @@ module Evoc
34
34
  end
35
35
  sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
36
36
  end
37
- # group the txes by size
38
- groups = sampling_history.group_by {|tx| tx.size}
39
- # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
40
- # i.e., random sampling is done first, then the sampled txes are removed from the sampling
41
- tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
42
- tx_sizes_to_sample_from.each do |group_size|
43
- if group_size == '*'
44
- # TODO: > 2 should be generalized to > X
45
- txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
46
- sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
47
- sample << sampled_ids
48
- STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
49
- # remove sampled txes from sampling_history
50
- filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
51
- sampling_history.clear
52
- filtered_hist.each {|tx| sampling_history << tx}
53
- elsif group_size.to_i
54
- # check if there were any txes of this size
55
- if group = groups[group_size.to_i]
56
- if group.size < self.opts[:sample_size]
57
- logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
58
- end
59
- sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
37
+ if self.opts[:recent]
38
+ STDERR.puts "Taking the #{self.opts[:sample_size]} most recent transactions, this overrides any other sampling params apart from maximum_commit_size"
39
+ txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}
40
+ sample = txes_larger_than_one.sort_by {|tx| -tx.index}.take(self.opts[:sample_size]).map(&:id)
41
+ else
42
+ # group the txes by size
43
+ groups = sampling_history.group_by {|tx| tx.size}
44
+ # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
45
+ # i.e., random sampling is done first, then the sampled txes are removed from the sampling
46
+ tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
47
+ tx_sizes_to_sample_from.each do |group_size|
48
+ if group_size == '*'
49
+ # TODO: > 2 should be generalized to > X
50
+ txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
51
+ sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
60
52
  sample << sampled_ids
61
- STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
53
+ STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
54
+ # remove sampled txes from sampling_history
55
+ filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
56
+ sampling_history.clear
57
+ filtered_hist.each {|tx| sampling_history << tx}
58
+ elsif group_size.to_i
59
+ # check if there were any txes of this size
60
+ if group = groups[group_size.to_i]
61
+ if group.size < self.opts[:sample_size]
62
+ logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
63
+ end
64
+ sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
65
+ sample << sampled_ids
66
+ STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
67
+ else
68
+ logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
69
+ end
62
70
  else
63
- logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
71
+ raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
64
72
  end
65
- else
66
- raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
67
73
  end
68
74
  end
69
75
  sample.flatten.uniq
@@ -238,6 +244,7 @@ module Evoc
238
244
  # convert query string to array of items
239
245
  query_hash['query'] = query_hash['query'].split(',').map(&:to_i)
240
246
  # verify query before executing
247
+ tx = nil
241
248
  if tx = Evoc::HistoryStore.base_history.get_tx(id: query_hash['tx_id'],id_type: :id)
242
249
  if !(query_hash['query'] - tx.items).empty?
243
250
  raise Evoc::Exceptions::ConfigurationError.new "The query generated from #{query_hash['tx_id']} was not a subset of the same tx in the loaded history. The query was: '#{query_hash['query']}', the tx was '#{tx.items}'"
@@ -273,10 +280,10 @@ module Evoc
273
280
  max_size: s.max_size,
274
281
  aggregator: s.aggregator,
275
282
  measures: s.measures)
276
- Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], top_k: self.opts[:top_k], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
283
+ Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], topk: self.opts[:topk], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
277
284
  result = Evoc::RecommendationCache.to_h(measures: s.measures)
278
285
  # merge scenario params with result hash and dump as json
279
- $stdout.puts s.to_h.merge(result).to_json
286
+ $stdout.puts s.to_h.merge(result).merge({topk: self.opts[:topk],date: tx.date}).to_json
280
287
  rescue ArgumentError => e
281
288
  invalid_configuration += 1
282
289
  last_error = e.message
@@ -12,22 +12,14 @@ module Evoc
12
12
  attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
13
13
  end
14
14
 
15
- def self.recommendation_cached?(algorithm:,
16
- query:,
17
- model_start:,
18
- model_end:,
19
- max_size: nil)
15
+ def self.recommendation_cached?(algorithm:, query:, model_start:, model_end:, max_size: nil)
20
16
  return self.tag == [algorithm,query,model_start,model_end,max_size].hash
21
17
  end
22
18
 
23
19
 
24
- def self.get_recommendation(algorithm:,
25
- query:,
26
- model_start:,
27
- model_end:,
28
- max_size: nil,
29
- aggregator: nil,
30
- measures: [])
20
+ ##
21
+ # @param scenario <Evoc::Scenario> the scenario to cache a new recommendation for
22
+ def self.get_recommendation(algorithm:, query:, model_start:, model_end:, max_size: nil, aggregator: nil, measures: [])
31
23
  # check if a new base recommendation needs to be generated
32
24
  tag = [algorithm,query,model_start,model_end,max_size].hash
33
25
  if self.tag != tag
@@ -76,10 +68,10 @@ module Evoc
76
68
  # @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
77
69
  #
78
70
  # @return [Hash[aggregator][evaluator][result]] the hash of results
79
- def self.evaluate_last(evaluators: ,top_k: nil, unique_consequents: nil,expected_outcome:,measure_combination: )
71
+ def self.evaluate_last(evaluators: ,expected_outcome:,measure_combination:,topk: nil, unique_consequents: nil)
80
72
  if !self.last_recommendation.nil?
81
73
  self.evaluation = self.last_recommendation.evaluate_with(evaluators: evaluators,
82
- top_k: top_k,
74
+ topk: topk,
83
75
  unique_consequents: unique_consequents,
84
76
  expected_outcome: expected_outcome,
85
77
  measure_combination: measure_combination)
data/lib/evoc/rule.rb CHANGED
@@ -26,13 +26,13 @@ module Evoc
26
26
  end
27
27
 
28
28
  def human_lhs
29
- if !self.tx_store.nil? # & self.lhs.all? {|i| i.is_a?(Numeric)}
29
+ if !self.tx_store.nil?
30
30
  self.tx_store.ints2names(self.lhs.map(&:to_i)).join(',')
31
31
  end
32
32
  end
33
33
 
34
34
  def human_rhs
35
- if !self.tx_store.nil? #& self.rhs.all? {|i| i.is_a?(Numeric)}
35
+ if !self.tx_store.nil?
36
36
  self.tx_store.ints2names(self.rhs.map(&:to_i)).join(',')
37
37
  end
38
38
  end
@@ -117,12 +117,13 @@ module Evoc
117
117
 
118
118
 
119
119
  # Needed by Evaluate mixin
120
- def evaluation_format(measures:, expected_outcome:)
120
+ def evaluation_format(measures:, expected_outcome:,topk: nil)
121
121
  current_weight = nil
122
122
  current_group = []
123
123
  recommendation = []
124
+ topk = (topk.nil? ? self.size : topk)
124
125
  # sort and filter out duplicate consequents
125
- self.sort_on(measures: measures, rules: self.unique_by(measures.first)).each do |r|
126
+ self.sort_on(measures: measures, rules: self.unique_by(measures.first)).take(topk).each do |r|
126
127
  expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
127
128
  weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
128
129
  if current_weight.nil?
@@ -153,19 +154,19 @@ module Evoc
153
154
  # @param [String] evaluator the method to use for evaluating
154
155
  # @param [Array] expected_outcome the list of items to evaluate against
155
156
  # @param [Array] measure_combination the list of measures used to first sort the recommendation
156
- def evaluate_with(evaluators:,expected_outcome:,measure_combination:,top_k: nil,unique_consequents: nil)
157
+ def evaluate_with(evaluators:,expected_outcome:,measure_combination:,topk: nil,unique_consequents: nil)
157
158
  if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
158
159
  logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
159
160
  # sort the rules on each combination and evaluate
160
- # if !top_k.nil?
161
- # raise ArgumentError, "Top K must be a number" unless top_k.is_a?(Numeric)
162
- # sorted_rules = sorted_rules.take(top_k)
161
+ # if !topk.nil?
162
+ # raise ArgumentError, "Top K must be a number" unless topk.is_a?(Numeric)
163
+ # sorted_rules = sorted_rules.take(topk)
163
164
  # end
164
165
  # convert rules into format used in evaluation
165
166
  # map to 0/1 list where 1 is a correct item and 0 is not
166
167
  # second item in each tuple gives the weight of the rule
167
168
  # evaluate the sorted list against the expected outcome
168
- recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome)
169
+ recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome, topk: topk)
169
170
  potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
170
171
  results = Hash.new
171
172
  evaluators.each do |evaluator|
@@ -193,6 +194,10 @@ module Evoc
193
194
  rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
194
195
  end
195
196
 
197
+ def self.sort_on(rules:, measures:)
198
+ rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
199
+ end
200
+
196
201
  ##
197
202
  # returns the set of unique consequents
198
203
  # where each consequent is the strongest given by the input measure
@@ -258,7 +263,7 @@ module Evoc
258
263
  csv << ['rule'] + defined_measures
259
264
  self.each do |rule|
260
265
  row = CSV::Row.new([],[],false)
261
- row << rule.human_name
266
+ row << rule.name
262
267
  defined_measures.each do |m|
263
268
  row << rule.get_measure(m).value
264
269
  end
data/lib/evoc/scenario.rb CHANGED
@@ -228,43 +228,5 @@ module Evoc
228
228
  def tx_size
229
229
  self.tx.size
230
230
  end
231
-
232
- ##
233
- #
234
- def instance_values_for_csv
235
- dont_include = ['opts', 'logger','time','filtered_model_size']
236
- self.instance_values.delete_if {|k,v| dont_include.include?(k)}
237
- end
238
-
239
- ##
240
- # generate an array suitable for a csv header
241
- def csv_header
242
- query = self.instance_values_for_csv.keys
243
- rule_store = !self.recommendation? ? [] : self.recommendation.csv_header
244
- rule_store + query
245
- end
246
-
247
- ##
248
- # generate an array of the current values of <self>
249
- # converts any array values to a comma separated string representation
250
- def to_csv_row
251
- query = self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
252
- rule_store = !self.recommendation? ? [] : self.recommendation.to_csv_row
253
- rule_store + query
254
- end
255
-
256
-
257
-
258
- ##
259
- # Prints the rules to standard out
260
- # sorted by strength
261
- def print
262
- if !self.recommendation?
263
- $stdout.puts ""
264
- else
265
- self.recommendation.print(measures)
266
- end
267
- end
268
-
269
231
  end
270
232
  end
data/lib/evoc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.7.0"
2
+ VERSION = "3.8.0"
3
3
  end
@@ -23,6 +23,7 @@ module EvocCLI
23
23
  method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
24
24
  method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
25
25
  method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
26
+ method_option :recent, type: :boolean, desc: "If transactions should be the most recent"
26
27
  method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
27
28
  method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
28
29
  desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
@@ -71,7 +72,7 @@ module EvocCLI
71
72
  method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
72
73
  method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
73
74
  method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
74
- method_option :top_k, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER an evential unique consequents filter"
75
+ method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
75
76
  desc "execute_scenarios [options]",""
76
77
  long_desc <<-LONGDESC
77
78
  keyword description
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.0
4
+ version: 3.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-23 00:00:00.000000000 Z
11
+ date: 2017-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler