evoc 3.9.1 → 3.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d556543c7f271c2c9c6a18ffb508b5f167b64629
4
- data.tar.gz: 62a6606b2096cf0d25a522cc27766f9bac752bcf
3
+ metadata.gz: debd6763073d247f88a9ba0b80f252b8f31c5a30
4
+ data.tar.gz: b21e8c6dfd037bc87ec631aff4ff998591347a10
5
5
  SHA512:
6
- metadata.gz: b73b5929392077c66b2b6a21e50abcb76715a8173cee0958a8238e93d8d3c86deb92bded7162158015b0cb1b38485d17194d629d3afa315af99800eff16ae55c
7
- data.tar.gz: 8882a45dab133b128ca7901d3924a46b716205af42fd6997bd8482bcc735bd63a869d4a55509f559ff53b69c62acb93ec8a62cebd09350696ce0e6f7aee20c38
6
+ metadata.gz: 66d8e086a2992bf24a50b786989e7d4a8a1cb1e170f2d94a97f0f8438860f5f31b8abdea7c301422fa83a020009f909e88e181a8391f6b4341c40737ecf7b0ea
7
+ data.tar.gz: c0dfb6534e75d4663284610e1c3b3198f3be91491203b4238130e23b53f1da00287f0ec9447005710d30f8fcb3dafb79ff1be286f817b7610ec574c70b8a2d3a
@@ -1,59 +1,51 @@
1
1
  module Evoc
2
2
  class ClosedRules
3
- extend Logging
4
-
5
3
  def self.closed_rules(tx_store:,query:)
6
4
  # @@store = tx_store
7
5
  # create initial trees, one tree per consequent
8
6
  tree = self.initialize_tree(tx_store,query)
9
- logger.debug "INIT TREE:"
10
- tree.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}"})
7
+ # puts "INIT TREE:"
8
+ # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
11
9
  closed_rules = Evoc::RuleStore.new(query: query)
12
- # tree.children.each do |consequent|
13
- self.extend_nodes(tree).each do |frequency, closed_sets|
10
+ tree.children.each do |consequent|
11
+ self.extend_nodes(consequent).each do |frequency, closed_sets|
14
12
  closed_sets.each do |closed_set|
15
13
  antecedent = closed_set - consequent.name
16
14
  closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
17
15
  end
18
16
  end
19
- # end
17
+ end
20
18
  return closed_rules
21
19
  end
22
20
 
23
21
  private
24
22
  def self.initialize_tree(tx_store, query)
25
- rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
26
23
  tree = Tree::TreeNode.new([])
27
- rules.each do |rule|
28
- txes_union = tx_store.transactions_of(rule.lhs.first) & tx_store.transactions_of(rule.rhs.first)
29
- union = [rule.lhs.first.to_s,rule.rhs.first.to_s]
30
- tree << Tree::TreeNode.new(union,txes_union)
24
+ # find all items that changed with something in the query
25
+ query_changed_in = tx_store.transactions_of_list(query)
26
+ # store all items from the query that have changed with each consequent
27
+ query_changed_in.each do |tx_id|
28
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
29
+ antecedent = (query & tx.items)
30
+ consequents = (tx.items - antecedent)
31
+ if consequents.size != 0
32
+ consequents.each do |consequent|
33
+ consequent_key = [consequent.to_s]
34
+ if tree[consequent_key].nil?
35
+ # initialize candidates
36
+ tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
37
+ end
38
+ txes_consequent = tree[consequent_key].content
39
+ antecedent.each do |item|
40
+ union = [item.to_s,consequent.to_s]
41
+ if tree[consequent_key][union].nil?
42
+ txes_union = tx_store.transactions_of(item) & txes_consequent
43
+ tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
44
+ end
45
+ end
46
+ end
47
+ end
31
48
  end
32
- # # find all items that changed with something in the query
33
- # query_changed_in = tx_store.transactions_of_list(query)
34
- # # store all items from the query that have changed with each consequent
35
- # query_changed_in.each do |tx_id|
36
- # tx = tx_store.get_tx(id:tx_id,id_type: :index)
37
- # antecedent = (query & tx.items)
38
- # consequents = (tx.items - antecedent)
39
- # if consequents.size != 0
40
- # consequents.each do |consequent|
41
- # consequent_key = [consequent.to_s]
42
- # if tree[consequent_key].nil?
43
- # # initialize candidates
44
- # tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
45
- # end
46
- # txes_consequent = tree[consequent_key].content
47
- # antecedent.each do |item|
48
- # union = [item.to_s,consequent.to_s]
49
- # if tree[consequent_key][union].nil?
50
- # txes_union = tx_store.transactions_of(item) & txes_consequent
51
- # tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
52
- # end
53
- # end
54
- # end
55
- # end
56
- # end
57
49
  return(tree)
58
50
  end
59
51
 
@@ -63,7 +55,7 @@ module Evoc
63
55
  a = current_node
64
56
  b = a.next_sibling
65
57
  while(!b.nil?) do
66
- logger.debug "Checking #{a.name}:{#{a.content}} against #{b.name}:{#{b.content}}"
58
+ # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
67
59
  ab = a.name | b.name
68
60
  a_txes = a.content
69
61
  b_txes = b.content
@@ -75,53 +67,54 @@ module Evoc
75
67
  if ab_txes.size > 0
76
68
  case self.compare(a_txes,b_txes)
77
69
  when 'EQUAL'
78
- logger.debug " EQUAL"
79
- logger.debug " removing #{b.name}"
80
- logger.debug " renaming #{a.name} to #{ab}"
70
+ # puts " EQUAL"
71
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
72
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
81
73
  temp = b.previous_sibling
82
74
  root.remove!(b)
83
75
  b = temp
84
76
  a.each {|n| n.rename(ab | n.name)}
85
77
  when 'A_IN_B'
86
- logger.debug " A in B"
87
- logger.debug " renaming #{a.name} to #{ab}"
78
+ # puts " A in B"
79
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
88
80
  a.each {|n| n.rename(ab | n.name)}
89
81
  when 'B_IN_A'
90
- logger.debug " B in A"
91
- logger.debug " removing #{b.name}"
92
- logger.debug " adding child #{ab} to #{a.name}"
82
+ # puts " B in A"
83
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
84
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
93
85
  temp = b.previous_sibling
94
86
  root.remove!(b)
95
87
  b = temp
96
88
  a << Tree::TreeNode.new(ab,ab_txes)
97
89
  when 'NOT_EQUAL'
98
- logger.debug " NOT EQUAL"
99
- logger.debug " adding child #{ab} to #{a.name}"
90
+ # puts " NOT EQUAL"
91
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
100
92
  a << Tree::TreeNode.new(ab,ab_txes)
101
93
  end
102
94
  end
103
- logger.debug "NEW TREE:"
104
- root.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}:#{node.content.size}"})
95
+ # puts "NEW TREE:"
96
+ # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
105
97
  b = b.next_sibling
106
- logger.debug "A next sibling #{b}}"
107
- logger.debug "A:#{a.name}, B:#{b.nil? ? nil : b.name}"
98
+ # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
99
+ # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
100
+ # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
108
101
  end # siblings.each
109
102
  if !a.children.empty?
110
- logger.debug "TRAVERSING DOWN"
103
+ # puts "TRAVERSING DOWN"
111
104
  self.extend_nodes(a, closed_rules: closed_rules)
112
105
  end
113
106
  # add node as closed rule if not subsumed by another rule already added
114
107
  rule_frequency = a.content.size
115
108
  rule = a.name
116
109
  if closed_rules[rule_frequency].nil?
117
- logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
110
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
118
111
  closed_rules[rule_frequency] = [rule]
119
112
  else
120
113
  if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
121
- logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
114
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
122
115
  closed_rules[rule_frequency] << rule
123
116
  else
124
- logger.debug "RULE SUBSUMED, NOT ADDING: #{rule}:#{rule_frequency}"
117
+ # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
125
118
  end
126
119
  end
127
120
  current_node = current_node.next_sibling
@@ -28,6 +28,35 @@ module Evoc
28
28
  CSV {|row| row << results}
29
29
  end
30
30
 
31
+ def evolution
32
+ CSV {|row| row << %w(index relevant_index overlap)}
33
+ self.tx_store.each do |tx|
34
+ changed_in = self.tx_store.transactions_of_list(tx.items, strict: false, identifier: :index)
35
+ previous_txes = changed_in.select {|i| i <= tx.index}
36
+ previous_txes.each do |prev_index|
37
+ prev_tx = self.tx_store.get_tx(id: prev_index,id_type: :index)
38
+ overlap = ((prev_tx.items & tx.items).size/tx.size.to_f).round(2)
39
+ CSV {|row| row << [tx.index,prev_index,overlap]}
40
+ end
41
+ end
42
+ end
43
+
44
+ def commits
45
+ unique_items = Set.new
46
+ changes_so_far = 0
47
+ self.tx_store.each do |tx|
48
+ data = Hash.new
49
+ tx.items.each {|item| unique_items << item}
50
+ changes_so_far = changes_so_far += tx.items.size
51
+ data['sha'] = tx.id
52
+ data['index'] = tx.index
53
+ data['num_changes'] = tx.items.size
54
+ data['items_touched_so_far'] = unique_items.size
55
+ data['moving_average'] = (changes_so_far/(tx.index+1)).to_f.round(2)
56
+ STDOUT.puts data.to_json
57
+ end
58
+ end
59
+
31
60
  def avg_method_changes_per_parsable_file
32
61
  parsable_files_changed = 0
33
62
  method_changes = 0
@@ -1,37 +1,49 @@
1
1
  class Array
2
- def subset?(other)
3
- self & other == self
4
- end
5
-
6
- def include_any?(other)
7
- (self & other).size > 0
8
- end
9
-
10
- ##
11
- # returns the union of an array of arraya
12
- def array_union
13
- if union = self.inject(:|)
14
- return union
15
- else
16
- return []
17
- end
18
- end
19
-
20
- ##
21
- # returns the intersection of a list of lists
22
- def array_intersection
23
- if intersection = self.inject(:&)
24
- return intersection
25
- else
26
- return []
27
- end
28
- end
29
-
30
- ##
31
- # returns the list of items in self that was not in other
32
- def array_difference(other)
33
- self.map {|a| a - other}.array_union
34
- end
2
+
3
+ def mean
4
+ self.inject(0) { |sum, x| sum += x } / self.size.to_f
5
+ end
6
+
7
+ def median(already_sorted=false)
8
+ return nil if self.empty?
9
+ array = (already_sorted ? self : self.sort)
10
+ m_pos = array.size / 2
11
+ return array.size % 2 == 1 ? array[m_pos] : array[m_pos-1..m_pos].mean
12
+ end
13
+
14
+ def subset?(other)
15
+ self & other == self
16
+ end
17
+
18
+ def include_any?(other)
19
+ (self & other).size > 0
20
+ end
21
+
22
+ ##
23
+ # returns the union of an array of arraya
24
+ def array_union
25
+ if union = self.inject(:|)
26
+ return union
27
+ else
28
+ return []
29
+ end
30
+ end
31
+
32
+ ##
33
+ # returns the intersection of a list of lists
34
+ def array_intersection
35
+ if intersection = self.inject(:&)
36
+ return intersection
37
+ else
38
+ return []
39
+ end
40
+ end
41
+
42
+ ##
43
+ # returns the list of items in self that was not in other
44
+ def array_difference(other)
45
+ self.map {|a| a - other}.array_union
46
+ end
35
47
 
36
48
  def self.powerset(set)
37
49
  return [set] if set.empty?
@@ -11,6 +11,14 @@ module Evoc
11
11
  end
12
12
  end
13
13
 
14
+ def self.mean_support(rules:)
15
+ if rules.empty? then return nil end
16
+ return (rules.inject(0) {|sum,r| sum + r.m_support.value}/rules.size).to_f
17
+ end
18
+
19
+ def self.mean_support10(rules:)
20
+ return self.mean_support(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_support']).take(10).flatten.take(10))
21
+ end
14
22
 
15
23
  def self.mean_confidence(rules:)
16
24
  if rules.empty? then return nil end
@@ -17,6 +17,8 @@ module Evoc
17
17
  end
18
18
 
19
19
  def sample_transactions
20
+ # initialze a random number generator with fixed seed
21
+ rand = Random.new(self.opts[:seed])
20
22
  # by default we can sample from the whole history
21
23
  sampling_history = Evoc::HistoryStore.base_history
22
24
  STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
@@ -74,7 +76,7 @@ module Evoc
74
76
  tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
75
77
  tx_sizes_to_sample_from.each do |group_size|
76
78
  if group_size == '*'
77
- sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
79
+ sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
78
80
  sample << sampled_ids
79
81
  STDERR.puts "Sampled #{sampled_ids.size} txes"
80
82
  # remove sampled txes from sampling_history
@@ -87,7 +89,7 @@ module Evoc
87
89
  if group.size < self.opts[:sample_size]
88
90
  logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
89
91
  end
90
- sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
92
+ sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
91
93
  sample << sampled_ids
92
94
  STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
93
95
  else
@@ -109,6 +111,8 @@ module Evoc
109
111
  # tx_id, query
110
112
  #
111
113
  def generate_queries
114
+ # initialze a random number generator with fixed seed
115
+ rand = Random.new(self.opts[:seed])
112
116
  ##
113
117
  # write dict
114
118
  ##
@@ -154,15 +158,15 @@ module Evoc
154
158
  specified_sizes.uniq!
155
159
 
156
160
  random_sizes = []
157
- if self.opts[:random_select] then random_sizes << Random.new.rand(1..(tx_size-1)) end
161
+ if self.opts[:random_select] then random_sizes << Random.new.rand(self.opts[:minimum_query_size]..(tx_size-1)) end
158
162
 
159
163
  sampled_queries = []
160
164
  # only specified sizes
161
165
  if random_sizes.empty? & !specified_sizes.empty?
162
- sampled_queries = specified_sizes.map {|s| items.sample(s)}
166
+ sampled_queries = specified_sizes.map {|s| items.sample(s, random: rand)}
163
167
  # only random sizes
164
168
  elsif !random_sizes.empty? & specified_sizes.empty?
165
- sampled_queries = random_sizes.map {|s| items.sample(s)}
169
+ sampled_queries = random_sizes.map {|s| items.sample(s, random: rand)}
166
170
  # random + specified = randomly sample in range defined by specified
167
171
  # ex:
168
172
  # specified = [1,3,10,20]
@@ -172,9 +176,9 @@ module Evoc
172
176
  # 2. randomly select X in specified = Y
173
177
  # 3. randomly select Y in tx
174
178
  elsif !random_sizes.empty? & !specified_sizes.empty?
175
- specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
176
- if randomly_sampled_size = specified_sizes.sample #2.
177
- sampled_queries = [items.sample(randomly_sampled_size)] #3.
179
+ specified_sizes.select! {|s| (s < tx_size) & (s >= self.opts[:minimum_query_size])} #1.
180
+ if randomly_sampled_size = specified_sizes.sample(random: rand) #2.
181
+ sampled_queries = [items.sample(randomly_sampled_size, random: rand)] #3.
178
182
  end
179
183
  end
180
184
 
@@ -189,6 +193,9 @@ module Evoc
189
193
  logger.debug "The size of the sampled query was equal to the size of the transaction, skipping.. Tx ID: #{tx_id}. Query size: #{query.size}"
190
194
  next
191
195
  end
196
+ if query.size < self.opts[:minimum_query_size]
197
+ next
198
+ end
192
199
  CSV {|row| row << [tx_id,query.join(',')]}
193
200
  end
194
201
  else
@@ -231,8 +238,6 @@ module Evoc
231
238
  factor_max_size = self.opts[:max_size].nil? ? nil : self.opts[:max_size].map {|s| [ 'max_size',s ]}
232
239
  # Factor: Model age aka number of commits between query and last tx in history
233
240
  factor_model_age = self.opts[:model_age].nil? ? nil : self.opts[:model_age].map {|s| [ 'model_age',s ]}
234
- # Factor: Algorithm
235
- factor_algorithms = self.opts[:algorithms].nil? ? nil : self.opts[:algorithms].map {|a| ['algorithm',a]}
236
241
  # Factor: Measures
237
242
  factor_measures = self.opts[:measures].map {|c| ['measures',c]}
238
243
  # Factor: Aggregator
@@ -253,7 +258,8 @@ module Evoc
253
258
  num_lines = File.read(self.opts[:queries]).each_line.count-1
254
259
  current_line = 1
255
260
 
256
- factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
261
+ # compact removes nil values (not used factors)
262
+ factors = [factor_model_size,factor_max_size,factor_model_age,factor_measures,factor_permutation,factor_aggregators].compact
257
263
  num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
258
264
  invalid_configuration = 0
259
265
  last_error = 'no errors'
@@ -280,38 +286,53 @@ module Evoc
280
286
  end
281
287
 
282
288
  current_scenario = 1
283
- # - compact removes nil values (not used factors)
284
289
  # - the splat operator '*' turns the array into parameters for #product
285
290
  # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
286
291
  factors.first.product(*factors[1..-1]).each do |scenario|
287
- # Print progress to stderr
288
- STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
289
- if invalid_configuration > 0
290
- STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
291
- else
292
- STDERR.print " \r"
293
- end
294
-
295
292
  params = query_hash.merge(scenario.to_h)
296
293
  params[:case_id] = self.opts[:case_id]
297
294
  params[:granularity] = self.opts[:granularity]
298
295
  # initialize scenario
299
296
  s = Evoc::Scenario.new(params)
300
- begin
301
- Evoc::RecommendationCache.get_recommendation(algorithm: s.algorithm,
302
- query: s.query,
303
- model_start: s.model_start,
304
- model_end: s.model_end,
305
- max_size: s.max_size,
306
- aggregator: s.aggregator,
307
- measures: s.measures)
308
- Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], topk: self.opts[:topk], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
309
- result = Evoc::RecommendationCache.to_h(measures: s.measures)
310
- # merge scenario params with result hash and dump as json
311
- $stdout.puts s.to_h.merge({topk: self.opts[:topk],date: tx.date}).merge(result).to_json
312
- rescue ArgumentError => e
313
- invalid_configuration += 1
314
- last_error = e.message
297
+ scenario_stats = {}
298
+ if self.opts[:stats]
299
+ scenario_stats = s.stats
300
+ end
301
+ # Factor: Algorithm
302
+ self.opts[:algorithms].each do |algorithm|
303
+ s.algorithm = algorithm
304
+ # Print progress to stderr
305
+ STDERR.print "(#{self.opts[:case_id]}) Executing #{algorithm} on scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
306
+ if invalid_configuration > 0
307
+ STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
308
+ else
309
+ STDERR.print " \r"
310
+ end
311
+
312
+ begin
313
+ Evoc::RecommendationCache.get_recommendation(algorithm: algorithm,
314
+ query: s.query,
315
+ model_start: s.model_start,
316
+ model_end: s.model_end,
317
+ max_size: s.max_size,
318
+ aggregator: s.aggregator,
319
+ measures: s.measures)
320
+ Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators],
321
+ topk: self.opts[:topk],
322
+ unique_consequents: self.opts[:unique_consequents],
323
+ expected_outcome: s.expected_outcome,
324
+ measure_combination: s.measures)
325
+
326
+ # build json line by merging hashes
327
+ $stdout.puts s.to_h
328
+ .merge(scenario_stats)
329
+ .merge({topk: self.opts[:topk], date: tx.date})
330
+ .merge(Evoc::RecommendationCache.to_h(measures: s.measures))
331
+ .to_json
332
+ rescue ArgumentError => e
333
+ invalid_configuration += 1
334
+ last_error = e.message
335
+ end
315
336
  end
316
337
  current_scenario += 1
317
338
  end
@@ -12,14 +12,22 @@ module Evoc
12
12
  attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
13
13
  end
14
14
 
15
- def self.recommendation_cached?(algorithm:, query:, model_start:, model_end:, max_size: nil)
15
+ def self.recommendation_cached?(algorithm:,
16
+ query:,
17
+ model_start:,
18
+ model_end:,
19
+ max_size: nil)
16
20
  return self.tag == [algorithm,query,model_start,model_end,max_size].hash
17
21
  end
18
22
 
19
23
 
20
- ##
21
- # @param scenario <Evoc::Scenario> the scenario to cache a new recommendation for
22
- def self.get_recommendation(algorithm:, query:, model_start:, model_end:, max_size: nil, aggregator: nil, measures: [])
24
+ def self.get_recommendation(algorithm:,
25
+ query:,
26
+ model_start:,
27
+ model_end:,
28
+ max_size: nil,
29
+ aggregator: nil,
30
+ measures: [])
23
31
  # check if a new base recommendation needs to be generated
24
32
  tag = [algorithm,query,model_start,model_end,max_size].hash
25
33
  if self.tag != tag
@@ -16,6 +16,7 @@ module Evoc
16
16
  :model_size,
17
17
  :model_age,
18
18
  :max_size,
19
+ :stats,
19
20
  :opts
20
21
 
21
22
  def initialize(opts = Hash.new)
@@ -23,7 +24,7 @@ module Evoc
23
24
  self.opts = opts
24
25
  self.scenario_id = opts.hash
25
26
 
26
- # model_size depends on model_age, so set model_age first
27
+ self.tx_id = opts[:tx_id]
27
28
  self.model_age = opts[:model_age]
28
29
  opts.each do |attribute,value|
29
30
  self.send("#{attribute}=", value)
@@ -55,11 +56,17 @@ module Evoc
55
56
 
56
57
 
57
58
  def to_h
58
- fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
59
+ fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures stats)
59
60
  hash = Hash.new
60
61
  fields.each do |key|
61
62
  value = self.method(key).call
62
- hash[key] = value.is_a?(Array) ? value.join(',') : value
63
+ if value.is_a?(Array)
64
+ hash[key] = value.join(',')
65
+ elsif value.is_a?(Hash)
66
+ hash.merge!(value)
67
+ else
68
+ hash[key] = value
69
+ end
63
70
  end
64
71
  return hash
65
72
  end
@@ -228,5 +235,65 @@ module Evoc
228
235
  def tx_size
229
236
  self.tx.size
230
237
  end
238
+
239
+ def stats
240
+ time_start = Time.now
241
+ history = Evoc::HistoryStore.get_history(self.model_start,self.model_end,self.max_size)
242
+ relevant_transactions = Set.new
243
+ relevant_items = Set.new
244
+ relevant_ages = []
245
+ avg_age_of_relevant_transactions = 0
246
+ avg_size_of_relevant_transactions = 0
247
+ files_changed = Set.new
248
+ num_methods_changed = 0
249
+ num_new_items = 0
250
+ # @avg_size_of_relevant_transactions = 0
251
+ # mean_age_of_relevant
252
+ # media_age_of_relevant
253
+ # ratio_new_items
254
+ self.query.each do |item|
255
+ if change = history.int_2_name[item]
256
+ change = change.split(':')
257
+ files_changed << change[0]
258
+ if change.size > 1
259
+ num_methods_changed = num_methods_changed + 1
260
+ end
261
+ indexes_of_previous_changes = history.transactions_of(item, identifier: :index)
262
+ if new_item = (indexes_of_previous_changes.size == 0)
263
+ num_new_items = num_new_items + 1
264
+ else
265
+ indexes_of_previous_changes.each do |tx_index|
266
+ relevant_so_far = relevant_transactions.size
267
+ relevant_transactions << tx_index
268
+ new_relevant = (relevant_transactions.size > relevant_so_far)
269
+ if new_relevant
270
+ age = (self.tx_index - tx_index)
271
+ relevant_ages << age
272
+ tx = history.get_tx(id: tx_index,id_type: :index)
273
+ avg_size_of_relevant_transactions = avg_size_of_relevant_transactions + tx.size
274
+ avg_age_of_relevant_transactions = avg_age_of_relevant_transactions + age
275
+ end
276
+ end
277
+ end
278
+ else
279
+ num_new_items = num_new_items + 1
280
+ end
281
+ end
282
+
283
+ num_relevant_transactions = relevant_transactions.size
284
+
285
+ time_end = Time.now
286
+ time_generate_stats = TimeDifference.between(time_start,time_end).in_seconds.round(8)
287
+ {time_generate_stats: time_generate_stats,
288
+ num_files_changed: files_changed.size,
289
+ num_methods_changed: num_methods_changed,
290
+ num_new_items: num_new_items,
291
+ num_relevant_transactions: num_relevant_transactions,
292
+ median_age_of_relevant_transactions: relevant_ages.median,
293
+ avg_age_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_age_of_relevant_transactions/num_relevant_transactions).to_f,
294
+ avg_size_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_size_of_relevant_transactions/num_relevant_transactions).to_f}
295
+ end
296
+
297
+
231
298
  end
232
299
  end
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.9.1"
2
+ VERSION = "3.10.0"
3
3
  end
@@ -6,6 +6,17 @@ module EvocCLI
6
6
  class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
7
  class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
8
8
 
9
+ desc "evolution","Outputs where the items of all transactions previously changed."
10
+ def evolution
11
+ a = Evoc::Analyze.new(options)
12
+ a.evolution
13
+ end
14
+
15
+ desc "commits","Outputs the items touched so far and the moving average of items per transactions for every transaction in the history"
16
+ def commits
17
+ a = Evoc::Analyze.new(options)
18
+ a.commits
19
+ end
9
20
 
10
21
  method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
11
22
  desc "measure_values","Empirically investigate the range of interestingness measures"
@@ -19,6 +19,7 @@ module EvocCLI
19
19
 
20
20
  ##
21
21
  # sample_transactions
22
+ method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
22
23
  method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
23
24
  method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
24
25
  method_option :recent, type: :numeric, desc: "Filter to the X most recent transactions"
@@ -38,6 +39,7 @@ module EvocCLI
38
39
  ##
39
40
  # generate_queries
40
41
  #
42
+ method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
41
43
  method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
42
44
  method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
43
45
  method_option :select, aliases: '-s', type: :array, default: [],
@@ -46,6 +48,7 @@ module EvocCLI
46
48
  desc: "Reverse version of --select (select \"all but\" X)"
47
49
  method_option :percentage, aliases: '-e', type: :array,
48
50
  desc: "Percentage of items to select for each query"
51
+ method_option :minimum_query_size, type: :numeric, default: 1, desc: "Only sample queries of at least this size"
49
52
  method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
50
53
  method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
51
54
  method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
@@ -75,6 +78,7 @@ module EvocCLI
75
78
  method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
76
79
  method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
77
80
  method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
81
+ method_option :stats, type: :boolean, required: false, desc: "Generate extra stats describing each scenario"
78
82
  desc "execute_scenarios [options]",""
79
83
  def execute_scenarios
80
84
  if !options[:permutation].nil?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.1
4
+ version: 3.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-09 00:00:00.000000000 Z
11
+ date: 2017-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler