evoc 3.8.1 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 085ad4c083e2ac19df50dbecd0a6279d0b5e010c
4
- data.tar.gz: 57f950592bad1814599e2394c028ab8c47df8e50
3
+ metadata.gz: 88a68050e4c0549ae496b8aa53cfdf00c452a596
4
+ data.tar.gz: 9f51e810f2402c4238f0fd0df127de5df813ea3b
5
5
  SHA512:
6
- metadata.gz: d8e4b6e718744121e6c50c3c49111c1f679c47f57376d9cca287697e2e04efa22bad6a55f6c29c7beba21e6a10b9f15134f1f539de9249d60a84e38846595a8e
7
- data.tar.gz: 05f581eb1a4c8838c0258e920aca701db8400648b7dd8c2ab4a3184327732ad4e67b87bc46c2b860a055f8cece51f81509fea885d2c340d28abe0ece26ba3234
6
+ metadata.gz: 2d8cb5d438e8299ffb5ad7a8eabf0cd99c3a3dec8951a32e1e7eea476b5b471a7fa02afd04320d56b5d539c20315bfaaf1422436b545dbbbcc32125ee70d0d2a
7
+ data.tar.gz: bbaee8dcd9dee18adeb8bb070e7aaf33b19225d237026546d6924684518006f400e527d3758f3f9a8a23cb9bc929a6d56a3d371ee9ac46ec2125f3f86cc61419
@@ -1,51 +1,59 @@
1
1
  module Evoc
2
2
  class ClosedRules
3
+ extend Logging
4
+
3
5
  def self.closed_rules(tx_store:,query:)
4
6
  # @@store = tx_store
5
7
  # create initial trees, one tree per consequent
6
8
  tree = self.initialize_tree(tx_store,query)
7
- # puts "INIT TREE:"
8
- # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
9
+ logger.debug "INIT TREE:"
10
+ tree.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}"})
9
11
  closed_rules = Evoc::RuleStore.new(query: query)
10
- tree.children.each do |consequent|
11
- self.extend_nodes(consequent).each do |frequency, closed_sets|
12
+ # tree.children.each do |consequent|
13
+ self.extend_nodes(tree).each do |frequency, closed_sets|
12
14
  closed_sets.each do |closed_set|
13
15
  antecedent = closed_set - consequent.name
14
16
  closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
15
17
  end
16
18
  end
17
- end
19
+ # end
18
20
  return closed_rules
19
21
  end
20
22
 
21
23
  private
22
24
  def self.initialize_tree(tx_store, query)
25
+ rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
23
26
  tree = Tree::TreeNode.new([])
24
- # find all items that changed with something in the query
25
- query_changed_in = tx_store.transactions_of_list(query)
26
- # store all items from the query that have changed with each consequent
27
- query_changed_in.each do |tx_id|
28
- tx = tx_store.get_tx(id:tx_id,id_type: :index)
29
- antecedent = (query & tx.items)
30
- consequents = (tx.items - antecedent)
31
- if consequents.size != 0
32
- consequents.each do |consequent|
33
- consequent_key = [consequent.to_s]
34
- if tree[consequent_key].nil?
35
- # initialize candidates
36
- tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
37
- end
38
- txes_consequent = tree[consequent_key].content
39
- antecedent.each do |item|
40
- union = [item.to_s,consequent.to_s]
41
- if tree[consequent_key][union].nil?
42
- txes_union = tx_store.transactions_of(item) & txes_consequent
43
- tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
44
- end
45
- end
46
- end
47
- end
27
+ rules.each do |rule|
28
+ txes_union = tx_store.transactions_of(rule.lhs.first) & tx_store.transactions_of(rule.rhs.first)
29
+ union = [rule.lhs.first.to_s,rule.rhs.first.to_s]
30
+ tree << Tree::TreeNode.new(union,txes_union)
48
31
  end
32
+ # # find all items that changed with something in the query
33
+ # query_changed_in = tx_store.transactions_of_list(query)
34
+ # # store all items from the query that have changed with each consequent
35
+ # query_changed_in.each do |tx_id|
36
+ # tx = tx_store.get_tx(id:tx_id,id_type: :index)
37
+ # antecedent = (query & tx.items)
38
+ # consequents = (tx.items - antecedent)
39
+ # if consequents.size != 0
40
+ # consequents.each do |consequent|
41
+ # consequent_key = [consequent.to_s]
42
+ # if tree[consequent_key].nil?
43
+ # # initialize candidates
44
+ # tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
45
+ # end
46
+ # txes_consequent = tree[consequent_key].content
47
+ # antecedent.each do |item|
48
+ # union = [item.to_s,consequent.to_s]
49
+ # if tree[consequent_key][union].nil?
50
+ # txes_union = tx_store.transactions_of(item) & txes_consequent
51
+ # tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
52
+ # end
53
+ # end
54
+ # end
55
+ # end
56
+ # end
49
57
  return(tree)
50
58
  end
51
59
 
@@ -55,7 +63,7 @@ module Evoc
55
63
  a = current_node
56
64
  b = a.next_sibling
57
65
  while(!b.nil?) do
58
- # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
66
+ logger.debug "Checking #{a.name}:{#{a.content}} against #{b.name}:{#{b.content}}"
59
67
  ab = a.name | b.name
60
68
  a_txes = a.content
61
69
  b_txes = b.content
@@ -67,54 +75,53 @@ module Evoc
67
75
  if ab_txes.size > 0
68
76
  case self.compare(a_txes,b_txes)
69
77
  when 'EQUAL'
70
- # puts " EQUAL"
71
- # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
72
- # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
78
+ logger.debug " EQUAL"
79
+ logger.debug " removing #{b.name}"
80
+ logger.debug " renaming #{a.name} to #{ab}"
73
81
  temp = b.previous_sibling
74
82
  root.remove!(b)
75
83
  b = temp
76
84
  a.each {|n| n.rename(ab | n.name)}
77
85
  when 'A_IN_B'
78
- # puts " A in B"
79
- # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
86
+ logger.debug " A in B"
87
+ logger.debug " renaming #{a.name} to #{ab}"
80
88
  a.each {|n| n.rename(ab | n.name)}
81
89
  when 'B_IN_A'
82
- # puts " B in A"
83
- # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
84
- # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
90
+ logger.debug " B in A"
91
+ logger.debug " removing #{b.name}"
92
+ logger.debug " adding child #{ab} to #{a.name}"
85
93
  temp = b.previous_sibling
86
94
  root.remove!(b)
87
95
  b = temp
88
96
  a << Tree::TreeNode.new(ab,ab_txes)
89
97
  when 'NOT_EQUAL'
90
- # puts " NOT EQUAL"
91
- # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
98
+ logger.debug " NOT EQUAL"
99
+ logger.debug " adding child #{ab} to #{a.name}"
92
100
  a << Tree::TreeNode.new(ab,ab_txes)
93
101
  end
94
102
  end
95
- # puts "NEW TREE:"
96
- # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
103
+ logger.debug "NEW TREE:"
104
+ root.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}:#{node.content.size}"})
97
105
  b = b.next_sibling
98
- # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
99
- # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
100
- # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
106
+ logger.debug "A next sibling #{b}}"
107
+ logger.debug "A:#{a.name}, B:#{b.nil? ? nil : b.name}"
101
108
  end # siblings.each
102
109
  if !a.children.empty?
103
- # puts "TRAVERSING DOWN"
110
+ logger.debug "TRAVERSING DOWN"
104
111
  self.extend_nodes(a, closed_rules: closed_rules)
105
112
  end
106
113
  # add node as closed rule if not subsumed by another rule already added
107
114
  rule_frequency = a.content.size
108
115
  rule = a.name
109
116
  if closed_rules[rule_frequency].nil?
110
- # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
117
+ logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
111
118
  closed_rules[rule_frequency] = [rule]
112
119
  else
113
120
  if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
114
- # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
121
+ logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
115
122
  closed_rules[rule_frequency] << rule
116
123
  else
117
- # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
124
+ logger.debug "RULE SUBSUMED, NOT ADDING: #{rule}:#{rule_frequency}"
118
125
  end
119
126
  end
120
127
  current_node = current_node.next_sibling
@@ -17,59 +17,78 @@ module Evoc
17
17
  end
18
18
 
19
19
  def sample_transactions
20
- STDERR.puts "Sampling transactions.."
21
20
  # by default we can sample from the whole history
22
21
  sampling_history = Evoc::HistoryStore.base_history
22
+ STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
23
23
  sample = []
24
+
25
+ #################################################################################
26
+ # performing filtering steps on min/max commits size and minimum previous history
27
+ #################################################################################
28
+
29
+ if !self.opts[:recent].nil?
30
+ size = sampling_history.size
31
+ sampling_history = sampling_history[[0,size-self.opts[:recent]].max..-1]
32
+ STDERR.puts " Filtering to the #{self.opts[:recent]} most recent transactions (new pool size: #{sampling_history.size})"
33
+ end
24
34
  # filter out transactions larger than X
35
+ if !self.opts[:minimum_commit_size].nil?
36
+ sampling_history = sampling_history.select {|tx| tx.size >= self.opts[:minimum_commit_size]}
37
+ STDERR.puts " Filtering to txes larger than or equal to #{self.opts[:minimum_commit_size]} (new pool size: #{sampling_history.size})"
38
+ end
25
39
  if !self.opts[:maximum_commit_size].nil?
26
- STDERR.puts "Only sampling txes smaller than #{self.opts[:maximum_commit_size]}"
27
- sampling_history = sampling_history.clone_with_subset(0,sampling_history.size-1,self.opts[:maximum_commit_size])
40
+ sampling_history = sampling_history.select {|tx| tx.size <= self.opts[:maximum_commit_size]}
41
+ STDERR.puts " Filtering to txes smaller than or equal to #{self.opts[:maximum_commit_size]} (new pool size: #{sampling_history.size})"
28
42
  end
29
43
  # only sample transactions that have at least 'minimum_history' previous history
30
44
  if !self.opts[:minimum_history].nil?
31
- STDERR.puts "Only sampling txes with at least #{self.opts[:minimum_history]} previous txes (history)"
32
- if self.opts[:minimum_history] >= sampling_history.size-1
33
- raise ArgumentError, "The history you provided (#{self.opts[:transactions]}), only contains #{sampling_history.size}, not enough to sample with a minimum history set to #{self.opts[:minimum_history]}. Perhaps also #{self.opts[:maximum_commit_size]} must be increased."
45
+ sampling_history = sampling_history.select {|tx| tx.index >= self.opts[:minimum_history]}
46
+ STDERR.puts " Filtering to txes with at least #{self.opts[:minimum_history]} previous txes (new pool size: #{sampling_history.size})"
47
+ end
48
+ filtering_switches = [:recent,:minimum_commit_size,:maximum_commit_size,:minimum_history]
49
+ if filtering_switches.any? {|s| !self.opts[s].nil?}
50
+ if sampling_history.size == 0
51
+ STDERR.puts "WARNING: All transactions were filtered out, unable to sample"
52
+ return []
34
53
  end
35
- sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
36
54
  end
37
- if self.opts[:recent]
38
- STDERR.puts "Taking the #{self.opts[:sample_size]} most recent transactions, this overrides any other sampling params apart from maximum_commit_size"
39
- txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}
40
- sample = txes_larger_than_one.sort_by {|tx| -tx.index}.take(self.opts[:sample_size]).map(&:id)
41
- else
42
- # group the txes by size
43
- groups = sampling_history.group_by {|tx| tx.size}
44
- # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
45
- # i.e., random sampling is done first, then the sampled txes are removed from the sampling
46
- tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
47
- tx_sizes_to_sample_from.each do |group_size|
48
- if group_size == '*'
49
- # TODO: > 2 should be generalized to > X
50
- txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
51
- sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
52
- sample << sampled_ids
53
- STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
54
- # remove sampled txes from sampling_history
55
- filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
56
- sampling_history.clear
57
- filtered_hist.each {|tx| sampling_history << tx}
58
- elsif group_size.to_i
59
- # check if there were any txes of this size
60
- if group = groups[group_size.to_i]
61
- if group.size < self.opts[:sample_size]
62
- logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
63
- end
64
- sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
65
- sample << sampled_ids
66
- STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
67
- else
68
- logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
55
+
56
+ if self.opts[:sample_size] > sampling_history.size
57
+ STDERR.puts "WARNING: The sample size is larger than the available transactions"
58
+ end
59
+
60
+ ######################
61
+ # performing sampling
62
+ ######################
63
+
64
+ # group the txes by size
65
+ groups = sampling_history.group_by {|tx| tx.size}
66
+ # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
67
+ # i.e., random sampling is done first, then the sampled txes are removed from the sampling
68
+ tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
69
+ tx_sizes_to_sample_from.each do |group_size|
70
+ if group_size == '*'
71
+ sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
72
+ sample << sampled_ids
73
+ STDERR.puts "Sampled #{sampled_ids.size} txes"
74
+ # remove sampled txes from sampling_history
75
+ filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
76
+ sampling_history.clear
77
+ filtered_hist.each {|tx| sampling_history << tx}
78
+ elsif group_size.to_i
79
+ # check if there were any txes of this size
80
+ if group = groups[group_size.to_i]
81
+ if group.size < self.opts[:sample_size]
82
+ logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
69
83
  end
84
+ sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
85
+ sample << sampled_ids
86
+ STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
70
87
  else
71
- raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
88
+ logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
72
89
  end
90
+ else
91
+ raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
73
92
  end
74
93
  end
75
94
  sample.flatten.uniq
@@ -135,17 +154,17 @@ module Evoc
135
154
  # only specified sizes
136
155
  if random_sizes.empty? & !specified_sizes.empty?
137
156
  sampled_queries = specified_sizes.map {|s| items.sample(s)}
138
- # only random sizes
157
+ # only random sizes
139
158
  elsif !random_sizes.empty? & specified_sizes.empty?
140
159
  sampled_queries = random_sizes.map {|s| items.sample(s)}
141
- # random + specified = randomly sample in range defined by specified
142
- # ex:
143
- # specified = [1,3,10,20]
144
- # tx size = 4
145
- #
146
- # 1. remove X in specified that are larger than or equal to 4
147
- # 2. randomly select X in specified = Y
148
- # 3. randomly select Y in tx
160
+ # random + specified = randomly sample in range defined by specified
161
+ # ex:
162
+ # specified = [1,3,10,20]
163
+ # tx size = 4
164
+ #
165
+ # 1. remove X in specified that are larger than or equal to 4
166
+ # 2. randomly select X in specified = Y
167
+ # 3. randomly select Y in tx
149
168
  elsif !random_sizes.empty? & !specified_sizes.empty?
150
169
  specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
151
170
  if randomly_sampled_size = specified_sizes.sample #2.
@@ -231,6 +250,7 @@ module Evoc
231
250
  factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
232
251
  num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
233
252
  invalid_configuration = 0
253
+ last_error = 'no errors'
234
254
  CSV.foreach(self.opts[:queries], headers: true) do |query|
235
255
  # abort if the failsafe file is present
236
256
  if !self.opts[:fail_safe].nil?
@@ -254,7 +274,6 @@ module Evoc
254
274
  end
255
275
 
256
276
  current_scenario = 1
257
- last_error = 'no errors'
258
277
  # - compact removes nil values (not used factors)
259
278
  # - the splat operator '*' turns the array into parameters for #product
260
279
  # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
data/lib/evoc/tx_store.rb CHANGED
@@ -95,8 +95,8 @@ module Evoc
95
95
  @txes.last
96
96
  end
97
97
 
98
- def [] index
99
- @txes[index]
98
+ def [] *indexes
99
+ @txes[*indexes]
100
100
  end
101
101
 
102
102
  def to_s
data/lib/evoc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.8.1"
2
+ VERSION = "3.9.0"
3
3
  end
@@ -23,7 +23,8 @@ module EvocCLI
23
23
  method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
24
24
  method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
25
25
  method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
26
- method_option :recent, type: :boolean, desc: "If transactions should be the most recent"
26
+ method_option :minimum_commit_size, type: :numeric, default: 2, desc: "Filter out transactions which are smaller than this before sampling"
27
+ method_option :recent, type: :numeric, desc: "Sample in the X most recent transactions"
27
28
  method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
28
29
  method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
29
30
  desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.8.1
4
+ version: 3.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-15 00:00:00.000000000 Z
11
+ date: 2017-03-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler