evoc 3.7.0 → 3.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2b867c7a5e05b3c2be58b9dd361554a43f3f277c
4
- data.tar.gz: 9d00091a6fd7685f048930889aaf252df5aa6634
3
+ metadata.gz: d6583ca91c4e4987cd4816958b375795d5adfc9a
4
+ data.tar.gz: 8569042becd54ef9ebd4f2d94aac3fb106b9fb6d
5
5
  SHA512:
6
- metadata.gz: f89dbef20f735e0f8c6b8f7104ea8b38a4dd118707089b17acdf529591fd652478406047cb5d4b08e18afb746d3edf46dc7cf9ac7eb208d214f91e9050841c12
7
- data.tar.gz: 7e1955af5653df5d7afd986e9d522178b32d10c26771833e4d37aec910dd93208de56c8a6f83bd21faeb8796b098da808f72a05c9f6e3caf1100064fd20d7bb0
6
+ metadata.gz: 845bccb16af58856641c86224d03dd677c2d10cc69c545db2a5f9695765df3db1717a8b1d76e02c59d060eaa0a2290c1abe18bbd40980908d4db1faa4997306b
7
+ data.tar.gz: cb229b3e0cfea199d63929131258acc75a40d7ca872067740641bf324ec8c4d466fb16569131983a8f472e9962b5898c65ab731dfe4082be318645d56dba77f8
data/.gitignore CHANGED
@@ -15,3 +15,5 @@ evoc.log
15
15
  *TAGS
16
16
  *~
17
17
  *#
18
+ /spec/test_data/temp/*
19
+ /GPATH
@@ -105,36 +105,6 @@ module Evoc
105
105
  end
106
106
 
107
107
 
108
- def self.not_subsumed(tx_store:, query:)
109
- #initial filter, we consider all txes where something in the query changed
110
- query_changed_in = tx_store.transactions_of_list(query)
111
- # now find what subsets of the query changed in each tx
112
- trie = Containers::Trie.new
113
- query_changed_in.each do |tx_id|
114
- tx = tx_store.get_tx(id:tx_id,id_type: :index)
115
- antecedent = (query & tx.items)
116
- consequents = (tx.items - antecedent)
117
- if consequents.size != 0
118
- consequents.each do |consequent|
119
- entry = "#{consequent.to_s}#{antecedent.join('')}"
120
- if trie.get(entry).nil?
121
- puts "ADDED #{entry}"
122
- trie.push(entry,consequent.to_s)
123
- end
124
- end
125
- end
126
- end
127
- return trie
128
- # now generate rules
129
- # rule_store = Evoc::RuleStore.new(query: query)
130
- # rules.each do |consequent,antecedents|
131
- # antecedents.each do |antecedent|
132
- # rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
133
- # end
134
- # end
135
- # return rule_store
136
- end
137
-
138
108
  ##
139
109
  # Find the largest rules for each unique consequent
140
110
  def self.largest_rules(tx_store:,query:)
@@ -168,6 +138,57 @@ module Evoc
168
138
  return rule_store
169
139
  end
170
140
 
141
+ def self.hybrid(tx_store:,query:)
142
+ overlaps = self.largest_overlaps(tx_store: tx_store, query: query)
143
+ if overlaps.empty?
144
+ return Evoc::RuleStore.new(query: query) # no rules
145
+ else
146
+ if overlaps.size == 1
147
+ if overlaps.first.size == query.size # execute rose
148
+ return self.rose(tx_store: tx_store,query: query)
149
+ else
150
+ return self.tarmaq(0,tx_store: tx_store,query: overlaps.first) # execute tarmaq
151
+ end
152
+ else
153
+ store = Evoc::RuleStore.new(query: query)
154
+ overlaps.each do |overlap|
155
+ if overlap.size == 1 # execute co change
156
+ part_store = Evoc::Algorithm.co_change(tx_store: tx_store, query: overlap)
157
+ part_store.each {|r| store << r}
158
+ else # execute tarmaq
159
+ part_store = Evoc::Algorithm.tarmaq(0,tx_store: tx_store, query: overlap)
160
+ part_store.each {|r| store << r}
161
+ end
162
+ end
163
+ store.rules = store.select {|r| !query.include?(r.rhs.first)}
164
+ return store
165
+ end
166
+ end
167
+ end
168
+
169
+ def self.largest_overlaps(tx_store:,query:)
170
+ largest_match = []
171
+ #initial filter, we consider all txes where something in the query changed
172
+ query_changed_in = tx_store.transactions_of_list(query)
173
+ # now find what subsets of the query changed in each tx
174
+ query_changed_in.each do |tx_id|
175
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
176
+ largest_match_in_query = (query & tx.items)
177
+ match_size = largest_match_in_query.size
178
+ remainder_in_tx = tx.items - largest_match_in_query
179
+ if remainder_in_tx.size > 0
180
+ if match_size > largest_match.size
181
+ largest_match = largest_match_in_query
182
+ end
183
+ end
184
+ end
185
+ if largest_match.empty? #no more matches
186
+ return []
187
+ else
188
+ query_remainder = query - largest_match
189
+ return [largest_match] + self.largest_overlaps(tx_store: tx_store,query: query_remainder)
190
+ end
191
+ end
171
192
 
172
193
  ##
173
194
  # TARMAQ
@@ -206,7 +227,7 @@ module Evoc
206
227
  self.cached_rule_range(1,1,tx_store: tx_store, query: query)
207
228
  end
208
229
 
209
- def self.closed_rules(tx_store:, query:)
230
+ def self.tcharm(tx_store:, query:)
210
231
  Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
211
232
  end
212
233
 
data/lib/evoc/evaluate.rb CHANGED
@@ -11,11 +11,16 @@ module Evoc
11
11
  end
12
12
  end
13
13
 
14
+
14
15
  def self.mean_confidence(rules:)
15
16
  if rules.empty? then return nil end
16
17
  return (rules.inject(0) {|sum,r| sum + r.m_confidence.value}/rules.size).to_f
17
18
  end
18
19
 
20
+ def self.mean_confidence10(rules:)
21
+ return self.mean_confidence(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_confidence']).take(10).flatten.take(10))
22
+ end
23
+
19
24
  def self.discernibility(rec:)
20
25
  # AP is 0 for the empty list
21
26
  if rec.is_a?(Array) && rec.empty? # array and empty
@@ -65,6 +70,27 @@ module Evoc
65
70
  return (2*rec_correct/(rec_size + exp)).to_f
66
71
  end
67
72
 
73
+ ##
74
+ # @return an array containg the rank of each consequtive expected outcome
75
+ def self.relevant_ranks(rec:)
76
+ # AP is 0 for the empty list
77
+ if rec.is_a?(Array) && rec.empty? # array and empty
78
+ return []
79
+ end
80
+ self.validateInput(rec)
81
+
82
+ ranks = []
83
+ last_checked = 1
84
+ rec.each do |c|
85
+ c.each do |e|
86
+ if e == 1
87
+ ranks << last_checked
88
+ end
89
+ last_checked = last_checked + 1
90
+ end
91
+ end
92
+ return ranks
93
+ end
68
94
 
69
95
  ##
70
96
  # @return the rank of the first relevant itemjk
@@ -34,36 +34,42 @@ module Evoc
34
34
  end
35
35
  sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
36
36
  end
37
- # group the txes by size
38
- groups = sampling_history.group_by {|tx| tx.size}
39
- # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
40
- # i.e., random sampling is done first, then the sampled txes are removed from the sampling
41
- tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
42
- tx_sizes_to_sample_from.each do |group_size|
43
- if group_size == '*'
44
- # TODO: > 2 should be generalized to > X
45
- txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
46
- sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
47
- sample << sampled_ids
48
- STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
49
- # remove sampled txes from sampling_history
50
- filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
51
- sampling_history.clear
52
- filtered_hist.each {|tx| sampling_history << tx}
53
- elsif group_size.to_i
54
- # check if there were any txes of this size
55
- if group = groups[group_size.to_i]
56
- if group.size < self.opts[:sample_size]
57
- logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
58
- end
59
- sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
37
+ if self.opts[:recent]
38
+ STDERR.puts "Taking the #{self.opts[:sample_size]} most recent transactions, this overrides any other sampling params apart from maximum_commit_size"
39
+ txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}
40
+ sample = txes_larger_than_one.sort_by {|tx| -tx.index}.take(self.opts[:sample_size]).map(&:id)
41
+ else
42
+ # group the txes by size
43
+ groups = sampling_history.group_by {|tx| tx.size}
44
+ # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
45
+ # i.e., random sampling is done first, then the sampled txes are removed from the sampling
46
+ tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
47
+ tx_sizes_to_sample_from.each do |group_size|
48
+ if group_size == '*'
49
+ # TODO: > 2 should be generalized to > X
50
+ txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
51
+ sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
60
52
  sample << sampled_ids
61
- STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
53
+ STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
54
+ # remove sampled txes from sampling_history
55
+ filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
56
+ sampling_history.clear
57
+ filtered_hist.each {|tx| sampling_history << tx}
58
+ elsif group_size.to_i
59
+ # check if there were any txes of this size
60
+ if group = groups[group_size.to_i]
61
+ if group.size < self.opts[:sample_size]
62
+ logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
63
+ end
64
+ sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
65
+ sample << sampled_ids
66
+ STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
67
+ else
68
+ logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
69
+ end
62
70
  else
63
- logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
71
+ raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
64
72
  end
65
- else
66
- raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
67
73
  end
68
74
  end
69
75
  sample.flatten.uniq
@@ -238,6 +244,7 @@ module Evoc
238
244
  # convert query string to array of items
239
245
  query_hash['query'] = query_hash['query'].split(',').map(&:to_i)
240
246
  # verify query before executing
247
+ tx = nil
241
248
  if tx = Evoc::HistoryStore.base_history.get_tx(id: query_hash['tx_id'],id_type: :id)
242
249
  if !(query_hash['query'] - tx.items).empty?
243
250
  raise Evoc::Exceptions::ConfigurationError.new "The query generated from #{query_hash['tx_id']} was not a subset of the same tx in the loaded history. The query was: '#{query_hash['query']}', the tx was '#{tx.items}'"
@@ -273,10 +280,10 @@ module Evoc
273
280
  max_size: s.max_size,
274
281
  aggregator: s.aggregator,
275
282
  measures: s.measures)
276
- Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], top_k: self.opts[:top_k], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
283
+ Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], topk: self.opts[:topk], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
277
284
  result = Evoc::RecommendationCache.to_h(measures: s.measures)
278
285
  # merge scenario params with result hash and dump as json
279
- $stdout.puts s.to_h.merge(result).to_json
286
+ $stdout.puts s.to_h.merge(result).merge({topk: self.opts[:topk],date: tx.date}).to_json
280
287
  rescue ArgumentError => e
281
288
  invalid_configuration += 1
282
289
  last_error = e.message
@@ -12,22 +12,14 @@ module Evoc
12
12
  attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
13
13
  end
14
14
 
15
- def self.recommendation_cached?(algorithm:,
16
- query:,
17
- model_start:,
18
- model_end:,
19
- max_size: nil)
15
+ def self.recommendation_cached?(algorithm:, query:, model_start:, model_end:, max_size: nil)
20
16
  return self.tag == [algorithm,query,model_start,model_end,max_size].hash
21
17
  end
22
18
 
23
19
 
24
- def self.get_recommendation(algorithm:,
25
- query:,
26
- model_start:,
27
- model_end:,
28
- max_size: nil,
29
- aggregator: nil,
30
- measures: [])
20
+ ##
21
+ # @param scenario <Evoc::Scenario> the scenario to cache a new recommendation for
22
+ def self.get_recommendation(algorithm:, query:, model_start:, model_end:, max_size: nil, aggregator: nil, measures: [])
31
23
  # check if a new base recommendation needs to be generated
32
24
  tag = [algorithm,query,model_start,model_end,max_size].hash
33
25
  if self.tag != tag
@@ -76,10 +68,10 @@ module Evoc
76
68
  # @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
77
69
  #
78
70
  # @return [Hash[aggregator][evaluator][result]] the hash of results
79
- def self.evaluate_last(evaluators: ,top_k: nil, unique_consequents: nil,expected_outcome:,measure_combination: )
71
+ def self.evaluate_last(evaluators: ,expected_outcome:,measure_combination:,topk: nil, unique_consequents: nil)
80
72
  if !self.last_recommendation.nil?
81
73
  self.evaluation = self.last_recommendation.evaluate_with(evaluators: evaluators,
82
- top_k: top_k,
74
+ topk: topk,
83
75
  unique_consequents: unique_consequents,
84
76
  expected_outcome: expected_outcome,
85
77
  measure_combination: measure_combination)
data/lib/evoc/rule.rb CHANGED
@@ -26,13 +26,13 @@ module Evoc
26
26
  end
27
27
 
28
28
  def human_lhs
29
- if !self.tx_store.nil? # & self.lhs.all? {|i| i.is_a?(Numeric)}
29
+ if !self.tx_store.nil?
30
30
  self.tx_store.ints2names(self.lhs.map(&:to_i)).join(',')
31
31
  end
32
32
  end
33
33
 
34
34
  def human_rhs
35
- if !self.tx_store.nil? #& self.rhs.all? {|i| i.is_a?(Numeric)}
35
+ if !self.tx_store.nil?
36
36
  self.tx_store.ints2names(self.rhs.map(&:to_i)).join(',')
37
37
  end
38
38
  end
@@ -117,12 +117,13 @@ module Evoc
117
117
 
118
118
 
119
119
  # Needed by Evaluate mixin
120
- def evaluation_format(measures:, expected_outcome:)
120
+ def evaluation_format(measures:, expected_outcome:,topk: nil)
121
121
  current_weight = nil
122
122
  current_group = []
123
123
  recommendation = []
124
+ topk = (topk.nil? ? self.size : topk)
124
125
  # sort and filter out duplicate consequents
125
- self.sort_on(measures: measures, rules: self.unique_by(measures.first)).each do |r|
126
+ self.sort_on(measures: measures, rules: self.unique_by(measures.first)).take(topk).each do |r|
126
127
  expected = ((r.rhs - expected_outcome).empty? ? 1 : 0)
127
128
  weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_')
128
129
  if current_weight.nil?
@@ -153,19 +154,19 @@ module Evoc
153
154
  # @param [String] evaluator the method to use for evaluating
154
155
  # @param [Array] expected_outcome the list of items to evaluate against
155
156
  # @param [Array] measure_combination the list of measures used to first sort the recommendation
156
- def evaluate_with(evaluators:,expected_outcome:,measure_combination:,top_k: nil,unique_consequents: nil)
157
+ def evaluate_with(evaluators:,expected_outcome:,measure_combination:,topk: nil,unique_consequents: nil)
157
158
  if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
158
159
  logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}"
159
160
  # sort the rules on each combination and evaluate
160
- # if !top_k.nil?
161
- # raise ArgumentError, "Top K must be a number" unless top_k.is_a?(Numeric)
162
- # sorted_rules = sorted_rules.take(top_k)
161
+ # if !topk.nil?
162
+ # raise ArgumentError, "Top K must be a number" unless topk.is_a?(Numeric)
163
+ # sorted_rules = sorted_rules.take(topk)
163
164
  # end
164
165
  # convert rules into format used in evaluation
165
166
  # map to 0/1 list where 1 is a correct item and 0 is not
166
167
  # second item in each tuple gives the weight of the rule
167
168
  # evaluate the sorted list against the expected outcome
168
- recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome)
169
+ recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome, topk: topk)
169
170
  potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self}
170
171
  results = Hash.new
171
172
  evaluators.each do |evaluator|
@@ -193,6 +194,10 @@ module Evoc
193
194
  rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
194
195
  end
195
196
 
197
+ def self.sort_on(rules:, measures:)
198
+ rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
199
+ end
200
+
196
201
  ##
197
202
  # returns the set of unique consequents
198
203
  # where each consequent is the strongest given by the input measure
@@ -258,7 +263,7 @@ module Evoc
258
263
  csv << ['rule'] + defined_measures
259
264
  self.each do |rule|
260
265
  row = CSV::Row.new([],[],false)
261
- row << rule.human_name
266
+ row << rule.name
262
267
  defined_measures.each do |m|
263
268
  row << rule.get_measure(m).value
264
269
  end
data/lib/evoc/scenario.rb CHANGED
@@ -228,43 +228,5 @@ module Evoc
228
228
  def tx_size
229
229
  self.tx.size
230
230
  end
231
-
232
- ##
233
- #
234
- def instance_values_for_csv
235
- dont_include = ['opts', 'logger','time','filtered_model_size']
236
- self.instance_values.delete_if {|k,v| dont_include.include?(k)}
237
- end
238
-
239
- ##
240
- # generate an array suitable for a csv header
241
- def csv_header
242
- query = self.instance_values_for_csv.keys
243
- rule_store = !self.recommendation? ? [] : self.recommendation.csv_header
244
- rule_store + query
245
- end
246
-
247
- ##
248
- # generate an array of the current values of <self>
249
- # converts any array values to a comma separated string representation
250
- def to_csv_row
251
- query = self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
252
- rule_store = !self.recommendation? ? [] : self.recommendation.to_csv_row
253
- rule_store + query
254
- end
255
-
256
-
257
-
258
- ##
259
- # Prints the rules to standard out
260
- # sorted by strength
261
- def print
262
- if !self.recommendation?
263
- $stdout.puts ""
264
- else
265
- self.recommendation.print(measures)
266
- end
267
- end
268
-
269
231
  end
270
232
  end
data/lib/evoc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.7.0"
2
+ VERSION = "3.8.0"
3
3
  end
@@ -23,6 +23,7 @@ module EvocCLI
23
23
  method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
24
24
  method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
25
25
  method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
26
+ method_option :recent, type: :boolean, desc: "If transactions should be the most recent"
26
27
  method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
27
28
  method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
28
29
  desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
@@ -71,7 +72,7 @@ module EvocCLI
71
72
  method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
72
73
  method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
73
74
  method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
74
- method_option :top_k, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER an evential unique consequents filter"
75
+ method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
75
76
  desc "execute_scenarios [options]",""
76
77
  long_desc <<-LONGDESC
77
78
  keyword description
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.7.0
4
+ version: 3.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-23 00:00:00.000000000 Z
11
+ date: 2017-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler