evoc 3.9.1 → 3.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d556543c7f271c2c9c6a18ffb508b5f167b64629
4
- data.tar.gz: 62a6606b2096cf0d25a522cc27766f9bac752bcf
3
+ metadata.gz: debd6763073d247f88a9ba0b80f252b8f31c5a30
4
+ data.tar.gz: b21e8c6dfd037bc87ec631aff4ff998591347a10
5
5
  SHA512:
6
- metadata.gz: b73b5929392077c66b2b6a21e50abcb76715a8173cee0958a8238e93d8d3c86deb92bded7162158015b0cb1b38485d17194d629d3afa315af99800eff16ae55c
7
- data.tar.gz: 8882a45dab133b128ca7901d3924a46b716205af42fd6997bd8482bcc735bd63a869d4a55509f559ff53b69c62acb93ec8a62cebd09350696ce0e6f7aee20c38
6
+ metadata.gz: 66d8e086a2992bf24a50b786989e7d4a8a1cb1e170f2d94a97f0f8438860f5f31b8abdea7c301422fa83a020009f909e88e181a8391f6b4341c40737ecf7b0ea
7
+ data.tar.gz: c0dfb6534e75d4663284610e1c3b3198f3be91491203b4238130e23b53f1da00287f0ec9447005710d30f8fcb3dafb79ff1be286f817b7610ec574c70b8a2d3a
@@ -1,59 +1,51 @@
1
1
  module Evoc
2
2
  class ClosedRules
3
- extend Logging
4
-
5
3
  def self.closed_rules(tx_store:,query:)
6
4
  # @@store = tx_store
7
5
  # create initial trees, one tree per consequent
8
6
  tree = self.initialize_tree(tx_store,query)
9
- logger.debug "INIT TREE:"
10
- tree.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}"})
7
+ # puts "INIT TREE:"
8
+ # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
11
9
  closed_rules = Evoc::RuleStore.new(query: query)
12
- # tree.children.each do |consequent|
13
- self.extend_nodes(tree).each do |frequency, closed_sets|
10
+ tree.children.each do |consequent|
11
+ self.extend_nodes(consequent).each do |frequency, closed_sets|
14
12
  closed_sets.each do |closed_set|
15
13
  antecedent = closed_set - consequent.name
16
14
  closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
17
15
  end
18
16
  end
19
- # end
17
+ end
20
18
  return closed_rules
21
19
  end
22
20
 
23
21
  private
24
22
  def self.initialize_tree(tx_store, query)
25
- rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
26
23
  tree = Tree::TreeNode.new([])
27
- rules.each do |rule|
28
- txes_union = tx_store.transactions_of(rule.lhs.first) & tx_store.transactions_of(rule.rhs.first)
29
- union = [rule.lhs.first.to_s,rule.rhs.first.to_s]
30
- tree << Tree::TreeNode.new(union,txes_union)
24
+ # find all items that changed with something in the query
25
+ query_changed_in = tx_store.transactions_of_list(query)
26
+ # store all items from the query that have changed with each consequent
27
+ query_changed_in.each do |tx_id|
28
+ tx = tx_store.get_tx(id:tx_id,id_type: :index)
29
+ antecedent = (query & tx.items)
30
+ consequents = (tx.items - antecedent)
31
+ if consequents.size != 0
32
+ consequents.each do |consequent|
33
+ consequent_key = [consequent.to_s]
34
+ if tree[consequent_key].nil?
35
+ # initialize candidates
36
+ tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
37
+ end
38
+ txes_consequent = tree[consequent_key].content
39
+ antecedent.each do |item|
40
+ union = [item.to_s,consequent.to_s]
41
+ if tree[consequent_key][union].nil?
42
+ txes_union = tx_store.transactions_of(item) & txes_consequent
43
+ tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
44
+ end
45
+ end
46
+ end
47
+ end
31
48
  end
32
- # # find all items that changed with something in the query
33
- # query_changed_in = tx_store.transactions_of_list(query)
34
- # # store all items from the query that have changed with each consequent
35
- # query_changed_in.each do |tx_id|
36
- # tx = tx_store.get_tx(id:tx_id,id_type: :index)
37
- # antecedent = (query & tx.items)
38
- # consequents = (tx.items - antecedent)
39
- # if consequents.size != 0
40
- # consequents.each do |consequent|
41
- # consequent_key = [consequent.to_s]
42
- # if tree[consequent_key].nil?
43
- # # initialize candidates
44
- # tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
45
- # end
46
- # txes_consequent = tree[consequent_key].content
47
- # antecedent.each do |item|
48
- # union = [item.to_s,consequent.to_s]
49
- # if tree[consequent_key][union].nil?
50
- # txes_union = tx_store.transactions_of(item) & txes_consequent
51
- # tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
52
- # end
53
- # end
54
- # end
55
- # end
56
- # end
57
49
  return(tree)
58
50
  end
59
51
 
@@ -63,7 +55,7 @@ module Evoc
63
55
  a = current_node
64
56
  b = a.next_sibling
65
57
  while(!b.nil?) do
66
- logger.debug "Checking #{a.name}:{#{a.content}} against #{b.name}:{#{b.content}}"
58
+ # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
67
59
  ab = a.name | b.name
68
60
  a_txes = a.content
69
61
  b_txes = b.content
@@ -75,53 +67,54 @@ module Evoc
75
67
  if ab_txes.size > 0
76
68
  case self.compare(a_txes,b_txes)
77
69
  when 'EQUAL'
78
- logger.debug " EQUAL"
79
- logger.debug " removing #{b.name}"
80
- logger.debug " renaming #{a.name} to #{ab}"
70
+ # puts " EQUAL"
71
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
72
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
81
73
  temp = b.previous_sibling
82
74
  root.remove!(b)
83
75
  b = temp
84
76
  a.each {|n| n.rename(ab | n.name)}
85
77
  when 'A_IN_B'
86
- logger.debug " A in B"
87
- logger.debug " renaming #{a.name} to #{ab}"
78
+ # puts " A in B"
79
+ # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
88
80
  a.each {|n| n.rename(ab | n.name)}
89
81
  when 'B_IN_A'
90
- logger.debug " B in A"
91
- logger.debug " removing #{b.name}"
92
- logger.debug " adding child #{ab} to #{a.name}"
82
+ # puts " B in A"
83
+ # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
84
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
93
85
  temp = b.previous_sibling
94
86
  root.remove!(b)
95
87
  b = temp
96
88
  a << Tree::TreeNode.new(ab,ab_txes)
97
89
  when 'NOT_EQUAL'
98
- logger.debug " NOT EQUAL"
99
- logger.debug " adding child #{ab} to #{a.name}"
90
+ # puts " NOT EQUAL"
91
+ # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
100
92
  a << Tree::TreeNode.new(ab,ab_txes)
101
93
  end
102
94
  end
103
- logger.debug "NEW TREE:"
104
- root.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}:#{node.content.size}"})
95
+ # puts "NEW TREE:"
96
+ # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
105
97
  b = b.next_sibling
106
- logger.debug "A next sibling #{b}}"
107
- logger.debug "A:#{a.name}, B:#{b.nil? ? nil : b.name}"
98
+ # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
99
+ # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
100
+ # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
108
101
  end # siblings.each
109
102
  if !a.children.empty?
110
- logger.debug "TRAVERSING DOWN"
103
+ # puts "TRAVERSING DOWN"
111
104
  self.extend_nodes(a, closed_rules: closed_rules)
112
105
  end
113
106
  # add node as closed rule if not subsumed by another rule already added
114
107
  rule_frequency = a.content.size
115
108
  rule = a.name
116
109
  if closed_rules[rule_frequency].nil?
117
- logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
110
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
118
111
  closed_rules[rule_frequency] = [rule]
119
112
  else
120
113
  if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
121
- logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
114
+ # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
122
115
  closed_rules[rule_frequency] << rule
123
116
  else
124
- logger.debug "RULE SUBSUMED, NOT ADDING: #{rule}:#{rule_frequency}"
117
+ # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
125
118
  end
126
119
  end
127
120
  current_node = current_node.next_sibling
@@ -28,6 +28,35 @@ module Evoc
28
28
  CSV {|row| row << results}
29
29
  end
30
30
 
31
+ def evolution
32
+ CSV {|row| row << %w(index relevant_index overlap)}
33
+ self.tx_store.each do |tx|
34
+ changed_in = self.tx_store.transactions_of_list(tx.items, strict: false, identifier: :index)
35
+ previous_txes = changed_in.select {|i| i <= tx.index}
36
+ previous_txes.each do |prev_index|
37
+ prev_tx = self.tx_store.get_tx(id: prev_index,id_type: :index)
38
+ overlap = ((prev_tx.items & tx.items).size/tx.size.to_f).round(2)
39
+ CSV {|row| row << [tx.index,prev_index,overlap]}
40
+ end
41
+ end
42
+ end
43
+
44
+ def commits
45
+ unique_items = Set.new
46
+ changes_so_far = 0
47
+ self.tx_store.each do |tx|
48
+ data = Hash.new
49
+ tx.items.each {|item| unique_items << item}
50
+ changes_so_far = changes_so_far += tx.items.size
51
+ data['sha'] = tx.id
52
+ data['index'] = tx.index
53
+ data['num_changes'] = tx.items.size
54
+ data['items_touched_so_far'] = unique_items.size
55
+ data['moving_average'] = (changes_so_far/(tx.index+1)).to_f.round(2)
56
+ STDOUT.puts data.to_json
57
+ end
58
+ end
59
+
31
60
  def avg_method_changes_per_parsable_file
32
61
  parsable_files_changed = 0
33
62
  method_changes = 0
@@ -1,37 +1,49 @@
1
1
  class Array
2
- def subset?(other)
3
- self & other == self
4
- end
5
-
6
- def include_any?(other)
7
- (self & other).size > 0
8
- end
9
-
10
- ##
11
- # returns the union of an array of arraya
12
- def array_union
13
- if union = self.inject(:|)
14
- return union
15
- else
16
- return []
17
- end
18
- end
19
-
20
- ##
21
- # returns the intersection of a list of lists
22
- def array_intersection
23
- if intersection = self.inject(:&)
24
- return intersection
25
- else
26
- return []
27
- end
28
- end
29
-
30
- ##
31
- # returns the list of items in self that was not in other
32
- def array_difference(other)
33
- self.map {|a| a - other}.array_union
34
- end
2
+
3
+ def mean
4
+ self.inject(0) { |sum, x| sum += x } / self.size.to_f
5
+ end
6
+
7
+ def median(already_sorted=false)
8
+ return nil if self.empty?
9
+ array = (already_sorted ? self : self.sort)
10
+ m_pos = array.size / 2
11
+ return array.size % 2 == 1 ? array[m_pos] : array[m_pos-1..m_pos].mean
12
+ end
13
+
14
+ def subset?(other)
15
+ self & other == self
16
+ end
17
+
18
+ def include_any?(other)
19
+ (self & other).size > 0
20
+ end
21
+
22
+ ##
23
+ # returns the union of an array of arraya
24
+ def array_union
25
+ if union = self.inject(:|)
26
+ return union
27
+ else
28
+ return []
29
+ end
30
+ end
31
+
32
+ ##
33
+ # returns the intersection of a list of lists
34
+ def array_intersection
35
+ if intersection = self.inject(:&)
36
+ return intersection
37
+ else
38
+ return []
39
+ end
40
+ end
41
+
42
+ ##
43
+ # returns the list of items in self that was not in other
44
+ def array_difference(other)
45
+ self.map {|a| a - other}.array_union
46
+ end
35
47
 
36
48
  def self.powerset(set)
37
49
  return [set] if set.empty?
@@ -11,6 +11,14 @@ module Evoc
11
11
  end
12
12
  end
13
13
 
14
+ def self.mean_support(rules:)
15
+ if rules.empty? then return nil end
16
+ return (rules.inject(0) {|sum,r| sum + r.m_support.value}/rules.size).to_f
17
+ end
18
+
19
+ def self.mean_support10(rules:)
20
+ return self.mean_support(rules: Evoc::RuleStore.sort_on(rules: rules,measures: ['m_support']).take(10).flatten.take(10))
21
+ end
14
22
 
15
23
  def self.mean_confidence(rules:)
16
24
  if rules.empty? then return nil end
@@ -17,6 +17,8 @@ module Evoc
17
17
  end
18
18
 
19
19
  def sample_transactions
20
+ # initialze a random number generator with fixed seed
21
+ rand = Random.new(self.opts[:seed])
20
22
  # by default we can sample from the whole history
21
23
  sampling_history = Evoc::HistoryStore.base_history
22
24
  STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
@@ -74,7 +76,7 @@ module Evoc
74
76
  tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
75
77
  tx_sizes_to_sample_from.each do |group_size|
76
78
  if group_size == '*'
77
- sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
79
+ sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size], random: rand)
78
80
  sample << sampled_ids
79
81
  STDERR.puts "Sampled #{sampled_ids.size} txes"
80
82
  # remove sampled txes from sampling_history
@@ -87,7 +89,7 @@ module Evoc
87
89
  if group.size < self.opts[:sample_size]
88
90
  logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
89
91
  end
90
- sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
92
+ sampled_ids = group.sample(self.opts[:sample_size], random: rand).map(&:id)
91
93
  sample << sampled_ids
92
94
  STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
93
95
  else
@@ -109,6 +111,8 @@ module Evoc
109
111
  # tx_id, query
110
112
  #
111
113
  def generate_queries
114
+ # initialze a random number generator with fixed seed
115
+ rand = Random.new(self.opts[:seed])
112
116
  ##
113
117
  # write dict
114
118
  ##
@@ -154,15 +158,15 @@ module Evoc
154
158
  specified_sizes.uniq!
155
159
 
156
160
  random_sizes = []
157
- if self.opts[:random_select] then random_sizes << Random.new.rand(1..(tx_size-1)) end
161
+ if self.opts[:random_select] then random_sizes << Random.new.rand(self.opts[:minimum_query_size]..(tx_size-1)) end
158
162
 
159
163
  sampled_queries = []
160
164
  # only specified sizes
161
165
  if random_sizes.empty? & !specified_sizes.empty?
162
- sampled_queries = specified_sizes.map {|s| items.sample(s)}
166
+ sampled_queries = specified_sizes.map {|s| items.sample(s, random: rand)}
163
167
  # only random sizes
164
168
  elsif !random_sizes.empty? & specified_sizes.empty?
165
- sampled_queries = random_sizes.map {|s| items.sample(s)}
169
+ sampled_queries = random_sizes.map {|s| items.sample(s, random: rand)}
166
170
  # random + specified = randomly sample in range defined by specified
167
171
  # ex:
168
172
  # specified = [1,3,10,20]
@@ -172,9 +176,9 @@ module Evoc
172
176
  # 2. randomly select X in specified = Y
173
177
  # 3. randomly select Y in tx
174
178
  elsif !random_sizes.empty? & !specified_sizes.empty?
175
- specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
176
- if randomly_sampled_size = specified_sizes.sample #2.
177
- sampled_queries = [items.sample(randomly_sampled_size)] #3.
179
+ specified_sizes.select! {|s| (s < tx_size) & (s >= self.opts[:minimum_query_size])} #1.
180
+ if randomly_sampled_size = specified_sizes.sample(random: rand) #2.
181
+ sampled_queries = [items.sample(randomly_sampled_size, random: rand)] #3.
178
182
  end
179
183
  end
180
184
 
@@ -189,6 +193,9 @@ module Evoc
189
193
  logger.debug "The size of the sampled query was equal to the size of the transaction, skipping.. Tx ID: #{tx_id}. Query size: #{query.size}"
190
194
  next
191
195
  end
196
+ if query.size < self.opts[:minimum_query_size]
197
+ next
198
+ end
192
199
  CSV {|row| row << [tx_id,query.join(',')]}
193
200
  end
194
201
  else
@@ -231,8 +238,6 @@ module Evoc
231
238
  factor_max_size = self.opts[:max_size].nil? ? nil : self.opts[:max_size].map {|s| [ 'max_size',s ]}
232
239
  # Factor: Model age aka number of commits between query and last tx in history
233
240
  factor_model_age = self.opts[:model_age].nil? ? nil : self.opts[:model_age].map {|s| [ 'model_age',s ]}
234
- # Factor: Algorithm
235
- factor_algorithms = self.opts[:algorithms].nil? ? nil : self.opts[:algorithms].map {|a| ['algorithm',a]}
236
241
  # Factor: Measures
237
242
  factor_measures = self.opts[:measures].map {|c| ['measures',c]}
238
243
  # Factor: Aggregator
@@ -253,7 +258,8 @@ module Evoc
253
258
  num_lines = File.read(self.opts[:queries]).each_line.count-1
254
259
  current_line = 1
255
260
 
256
- factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
261
+ # compact removes nil values (not used factors)
262
+ factors = [factor_model_size,factor_max_size,factor_model_age,factor_measures,factor_permutation,factor_aggregators].compact
257
263
  num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
258
264
  invalid_configuration = 0
259
265
  last_error = 'no errors'
@@ -280,38 +286,53 @@ module Evoc
280
286
  end
281
287
 
282
288
  current_scenario = 1
283
- # - compact removes nil values (not used factors)
284
289
  # - the splat operator '*' turns the array into parameters for #product
285
290
  # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
286
291
  factors.first.product(*factors[1..-1]).each do |scenario|
287
- # Print progress to stderr
288
- STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
289
- if invalid_configuration > 0
290
- STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
291
- else
292
- STDERR.print " \r"
293
- end
294
-
295
292
  params = query_hash.merge(scenario.to_h)
296
293
  params[:case_id] = self.opts[:case_id]
297
294
  params[:granularity] = self.opts[:granularity]
298
295
  # initialize scenario
299
296
  s = Evoc::Scenario.new(params)
300
- begin
301
- Evoc::RecommendationCache.get_recommendation(algorithm: s.algorithm,
302
- query: s.query,
303
- model_start: s.model_start,
304
- model_end: s.model_end,
305
- max_size: s.max_size,
306
- aggregator: s.aggregator,
307
- measures: s.measures)
308
- Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators], topk: self.opts[:topk], unique_consequents: self.opts[:unique_consequents], expected_outcome: s.expected_outcome,measure_combination: s.measures)
309
- result = Evoc::RecommendationCache.to_h(measures: s.measures)
310
- # merge scenario params with result hash and dump as json
311
- $stdout.puts s.to_h.merge({topk: self.opts[:topk],date: tx.date}).merge(result).to_json
312
- rescue ArgumentError => e
313
- invalid_configuration += 1
314
- last_error = e.message
297
+ scenario_stats = {}
298
+ if self.opts[:stats]
299
+ scenario_stats = s.stats
300
+ end
301
+ # Factor: Algorithm
302
+ self.opts[:algorithms].each do |algorithm|
303
+ s.algorithm = algorithm
304
+ # Print progress to stderr
305
+ STDERR.print "(#{self.opts[:case_id]}) Executing #{algorithm} on scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines}"
306
+ if invalid_configuration > 0
307
+ STDERR.print " (scenarios skipped: #{invalid_configuration},last reason: #{last_error[0..20]}...) \r"
308
+ else
309
+ STDERR.print " \r"
310
+ end
311
+
312
+ begin
313
+ Evoc::RecommendationCache.get_recommendation(algorithm: algorithm,
314
+ query: s.query,
315
+ model_start: s.model_start,
316
+ model_end: s.model_end,
317
+ max_size: s.max_size,
318
+ aggregator: s.aggregator,
319
+ measures: s.measures)
320
+ Evoc::RecommendationCache.evaluate_last(evaluators: self.opts[:evaluators],
321
+ topk: self.opts[:topk],
322
+ unique_consequents: self.opts[:unique_consequents],
323
+ expected_outcome: s.expected_outcome,
324
+ measure_combination: s.measures)
325
+
326
+ # build json line by merging hashes
327
+ $stdout.puts s.to_h
328
+ .merge(scenario_stats)
329
+ .merge({topk: self.opts[:topk], date: tx.date})
330
+ .merge(Evoc::RecommendationCache.to_h(measures: s.measures))
331
+ .to_json
332
+ rescue ArgumentError => e
333
+ invalid_configuration += 1
334
+ last_error = e.message
335
+ end
315
336
  end
316
337
  current_scenario += 1
317
338
  end
@@ -12,14 +12,22 @@ module Evoc
12
12
  attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :filtered_model_size, :evaluation
13
13
  end
14
14
 
15
- def self.recommendation_cached?(algorithm:, query:, model_start:, model_end:, max_size: nil)
15
+ def self.recommendation_cached?(algorithm:,
16
+ query:,
17
+ model_start:,
18
+ model_end:,
19
+ max_size: nil)
16
20
  return self.tag == [algorithm,query,model_start,model_end,max_size].hash
17
21
  end
18
22
 
19
23
 
20
- ##
21
- # @param scenario <Evoc::Scenario> the scenario to cache a new recommendation for
22
- def self.get_recommendation(algorithm:, query:, model_start:, model_end:, max_size: nil, aggregator: nil, measures: [])
24
+ def self.get_recommendation(algorithm:,
25
+ query:,
26
+ model_start:,
27
+ model_end:,
28
+ max_size: nil,
29
+ aggregator: nil,
30
+ measures: [])
23
31
  # check if a new base recommendation needs to be generated
24
32
  tag = [algorithm,query,model_start,model_end,max_size].hash
25
33
  if self.tag != tag
@@ -16,6 +16,7 @@ module Evoc
16
16
  :model_size,
17
17
  :model_age,
18
18
  :max_size,
19
+ :stats,
19
20
  :opts
20
21
 
21
22
  def initialize(opts = Hash.new)
@@ -23,7 +24,7 @@ module Evoc
23
24
  self.opts = opts
24
25
  self.scenario_id = opts.hash
25
26
 
26
- # model_size depends on model_age, so set model_age first
27
+ self.tx_id = opts[:tx_id]
27
28
  self.model_age = opts[:model_age]
28
29
  opts.each do |attribute,value|
29
30
  self.send("#{attribute}=", value)
@@ -55,11 +56,17 @@ module Evoc
55
56
 
56
57
 
57
58
  def to_h
58
- fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
59
+ fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures stats)
59
60
  hash = Hash.new
60
61
  fields.each do |key|
61
62
  value = self.method(key).call
62
- hash[key] = value.is_a?(Array) ? value.join(',') : value
63
+ if value.is_a?(Array)
64
+ hash[key] = value.join(',')
65
+ elsif value.is_a?(Hash)
66
+ hash.merge!(value)
67
+ else
68
+ hash[key] = value
69
+ end
63
70
  end
64
71
  return hash
65
72
  end
@@ -228,5 +235,65 @@ module Evoc
228
235
  def tx_size
229
236
  self.tx.size
230
237
  end
238
+
239
+ def stats
240
+ time_start = Time.now
241
+ history = Evoc::HistoryStore.get_history(self.model_start,self.model_end,self.max_size)
242
+ relevant_transactions = Set.new
243
+ relevant_items = Set.new
244
+ relevant_ages = []
245
+ avg_age_of_relevant_transactions = 0
246
+ avg_size_of_relevant_transactions = 0
247
+ files_changed = Set.new
248
+ num_methods_changed = 0
249
+ num_new_items = 0
250
+ # @avg_size_of_relevant_transactions = 0
251
+ # mean_age_of_relevant
252
+ # media_age_of_relevant
253
+ # ratio_new_items
254
+ self.query.each do |item|
255
+ if change = history.int_2_name[item]
256
+ change = change.split(':')
257
+ files_changed << change[0]
258
+ if change.size > 1
259
+ num_methods_changed = num_methods_changed + 1
260
+ end
261
+ indexes_of_previous_changes = history.transactions_of(item, identifier: :index)
262
+ if new_item = (indexes_of_previous_changes.size == 0)
263
+ num_new_items = num_new_items + 1
264
+ else
265
+ indexes_of_previous_changes.each do |tx_index|
266
+ relevant_so_far = relevant_transactions.size
267
+ relevant_transactions << tx_index
268
+ new_relevant = (relevant_transactions.size > relevant_so_far)
269
+ if new_relevant
270
+ age = (self.tx_index - tx_index)
271
+ relevant_ages << age
272
+ tx = history.get_tx(id: tx_index,id_type: :index)
273
+ avg_size_of_relevant_transactions = avg_size_of_relevant_transactions + tx.size
274
+ avg_age_of_relevant_transactions = avg_age_of_relevant_transactions + age
275
+ end
276
+ end
277
+ end
278
+ else
279
+ num_new_items = num_new_items + 1
280
+ end
281
+ end
282
+
283
+ num_relevant_transactions = relevant_transactions.size
284
+
285
+ time_end = Time.now
286
+ time_generate_stats = TimeDifference.between(time_start,time_end).in_seconds.round(8)
287
+ {time_generate_stats: time_generate_stats,
288
+ num_files_changed: files_changed.size,
289
+ num_methods_changed: num_methods_changed,
290
+ num_new_items: num_new_items,
291
+ num_relevant_transactions: num_relevant_transactions,
292
+ median_age_of_relevant_transactions: relevant_ages.median,
293
+ avg_age_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_age_of_relevant_transactions/num_relevant_transactions).to_f,
294
+ avg_size_of_relevant_transactions: num_relevant_transactions == 0 ? nil : (avg_size_of_relevant_transactions/num_relevant_transactions).to_f}
295
+ end
296
+
297
+
231
298
  end
232
299
  end
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.9.1"
2
+ VERSION = "3.10.0"
3
3
  end
@@ -6,6 +6,17 @@ module EvocCLI
6
6
  class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
7
  class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
8
8
 
9
+ desc "evolution","Outputs where the items of all transactions previously changed."
10
+ def evolution
11
+ a = Evoc::Analyze.new(options)
12
+ a.evolution
13
+ end
14
+
15
+ desc "commits","Outputs the items touched so far and the moving average of items per transactions for every transaction in the history"
16
+ def commits
17
+ a = Evoc::Analyze.new(options)
18
+ a.commits
19
+ end
9
20
 
10
21
  method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
11
22
  desc "measure_values","Empirically investigate the range of interestingness measures"
@@ -19,6 +19,7 @@ module EvocCLI
19
19
 
20
20
  ##
21
21
  # sample_transactions
22
+ method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
22
23
  method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
23
24
  method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
24
25
  method_option :recent, type: :numeric, desc: "Filter to the X most recent transactions"
@@ -38,6 +39,7 @@ module EvocCLI
38
39
  ##
39
40
  # generate_queries
40
41
  #
42
+ method_option :seed, type: :numeric, default: 42, desc: "Seed to use when initializing random number generator"
41
43
  method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
42
44
  method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
43
45
  method_option :select, aliases: '-s', type: :array, default: [],
@@ -46,6 +48,7 @@ module EvocCLI
46
48
  desc: "Reverse version of --select (select \"all but\" X)"
47
49
  method_option :percentage, aliases: '-e', type: :array,
48
50
  desc: "Percentage of items to select for each query"
51
+ method_option :minimum_query_size, type: :numeric, default: 1, desc: "Only sample queries of at least this size"
49
52
  method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
50
53
  method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
51
54
  method_option :write_dict, type: :string, desc: "Write an item dictionary to the provided file"
@@ -75,6 +78,7 @@ module EvocCLI
75
78
  method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
76
79
  method_option :unique_consequents, type: :boolean, default: false, desc: "Filter our duplicate consequents when evaluating, keeping the strongest. Only has effect when evaluating non-aggregated recommendations."
77
80
  method_option :topk, type: :numeric, required: false, desc: "Evaluate over the top K items, these are selected AFTER any consequent filter"
81
+ method_option :stats, type: :boolean, required: false, desc: "Generate extra stats describing each scenario"
78
82
  desc "execute_scenarios [options]",""
79
83
  def execute_scenarios
80
84
  if !options[:permutation].nil?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.1
4
+ version: 3.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-09 00:00:00.000000000 Z
11
+ date: 2017-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler