evoc 3.8.1 → 3.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 085ad4c083e2ac19df50dbecd0a6279d0b5e010c
4
- data.tar.gz: 57f950592bad1814599e2394c028ab8c47df8e50
3
+ metadata.gz: 88a68050e4c0549ae496b8aa53cfdf00c452a596
4
+ data.tar.gz: 9f51e810f2402c4238f0fd0df127de5df813ea3b
5
5
  SHA512:
6
- metadata.gz: d8e4b6e718744121e6c50c3c49111c1f679c47f57376d9cca287697e2e04efa22bad6a55f6c29c7beba21e6a10b9f15134f1f539de9249d60a84e38846595a8e
7
- data.tar.gz: 05f581eb1a4c8838c0258e920aca701db8400648b7dd8c2ab4a3184327732ad4e67b87bc46c2b860a055f8cece51f81509fea885d2c340d28abe0ece26ba3234
6
+ metadata.gz: 2d8cb5d438e8299ffb5ad7a8eabf0cd99c3a3dec8951a32e1e7eea476b5b471a7fa02afd04320d56b5d539c20315bfaaf1422436b545dbbbcc32125ee70d0d2a
7
+ data.tar.gz: bbaee8dcd9dee18adeb8bb070e7aaf33b19225d237026546d6924684518006f400e527d3758f3f9a8a23cb9bc929a6d56a3d371ee9ac46ec2125f3f86cc61419
@@ -1,51 +1,59 @@
1
1
  module Evoc
2
2
  class ClosedRules
3
+ extend Logging
4
+
3
5
  def self.closed_rules(tx_store:,query:)
4
6
  # @@store = tx_store
5
7
  # create initial trees, one tree per consequent
6
8
  tree = self.initialize_tree(tx_store,query)
7
- # puts "INIT TREE:"
8
- # tree.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}"})
9
+ logger.debug "INIT TREE:"
10
+ tree.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}"})
9
11
  closed_rules = Evoc::RuleStore.new(query: query)
10
- tree.children.each do |consequent|
11
- self.extend_nodes(consequent).each do |frequency, closed_sets|
12
+ # tree.children.each do |consequent|
13
+ self.extend_nodes(tree).each do |frequency, closed_sets|
12
14
  closed_sets.each do |closed_set|
13
15
  antecedent = closed_set - consequent.name
14
16
  closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
15
17
  end
16
18
  end
17
- end
19
+ # end
18
20
  return closed_rules
19
21
  end
20
22
 
21
23
  private
22
24
  def self.initialize_tree(tx_store, query)
25
+ rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
23
26
  tree = Tree::TreeNode.new([])
24
- # find all items that changed with something in the query
25
- query_changed_in = tx_store.transactions_of_list(query)
26
- # store all items from the query that have changed with each consequent
27
- query_changed_in.each do |tx_id|
28
- tx = tx_store.get_tx(id:tx_id,id_type: :index)
29
- antecedent = (query & tx.items)
30
- consequents = (tx.items - antecedent)
31
- if consequents.size != 0
32
- consequents.each do |consequent|
33
- consequent_key = [consequent.to_s]
34
- if tree[consequent_key].nil?
35
- # initialize candidates
36
- tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
37
- end
38
- txes_consequent = tree[consequent_key].content
39
- antecedent.each do |item|
40
- union = [item.to_s,consequent.to_s]
41
- if tree[consequent_key][union].nil?
42
- txes_union = tx_store.transactions_of(item) & txes_consequent
43
- tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
44
- end
45
- end
46
- end
47
- end
27
+ rules.each do |rule|
28
+ txes_union = tx_store.transactions_of(rule.lhs.first) & tx_store.transactions_of(rule.rhs.first)
29
+ union = [rule.lhs.first.to_s,rule.rhs.first.to_s]
30
+ tree << Tree::TreeNode.new(union,txes_union)
48
31
  end
32
+ # # find all items that changed with something in the query
33
+ # query_changed_in = tx_store.transactions_of_list(query)
34
+ # # store all items from the query that have changed with each consequent
35
+ # query_changed_in.each do |tx_id|
36
+ # tx = tx_store.get_tx(id:tx_id,id_type: :index)
37
+ # antecedent = (query & tx.items)
38
+ # consequents = (tx.items - antecedent)
39
+ # if consequents.size != 0
40
+ # consequents.each do |consequent|
41
+ # consequent_key = [consequent.to_s]
42
+ # if tree[consequent_key].nil?
43
+ # # initialize candidates
44
+ # tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
45
+ # end
46
+ # txes_consequent = tree[consequent_key].content
47
+ # antecedent.each do |item|
48
+ # union = [item.to_s,consequent.to_s]
49
+ # if tree[consequent_key][union].nil?
50
+ # txes_union = tx_store.transactions_of(item) & txes_consequent
51
+ # tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
52
+ # end
53
+ # end
54
+ # end
55
+ # end
56
+ # end
49
57
  return(tree)
50
58
  end
51
59
 
@@ -55,7 +63,7 @@ module Evoc
55
63
  a = current_node
56
64
  b = a.next_sibling
57
65
  while(!b.nil?) do
58
- # print "Checking #{@@store.ints2names(a.name.map(&:to_i))}:{#{a.content}} against #{@@store.ints2names(b.name.map(&:to_i))}:{#{b.content}}"
66
+ logger.debug "Checking #{a.name}:{#{a.content}} against #{b.name}:{#{b.content}}"
59
67
  ab = a.name | b.name
60
68
  a_txes = a.content
61
69
  b_txes = b.content
@@ -67,54 +75,53 @@ module Evoc
67
75
  if ab_txes.size > 0
68
76
  case self.compare(a_txes,b_txes)
69
77
  when 'EQUAL'
70
- # puts " EQUAL"
71
- # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
72
- # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
78
+ logger.debug " EQUAL"
79
+ logger.debug " removing #{b.name}"
80
+ logger.debug " renaming #{a.name} to #{ab}"
73
81
  temp = b.previous_sibling
74
82
  root.remove!(b)
75
83
  b = temp
76
84
  a.each {|n| n.rename(ab | n.name)}
77
85
  when 'A_IN_B'
78
- # puts " A in B"
79
- # puts " renaming #{@@store.ints2names(a.name.map(&:to_i))} to #{@@store.ints2names(ab.map(&:to_i))}"
86
+ logger.debug " A in B"
87
+ logger.debug " renaming #{a.name} to #{ab}"
80
88
  a.each {|n| n.rename(ab | n.name)}
81
89
  when 'B_IN_A'
82
- # puts " B in A"
83
- # puts " removing #{@@store.ints2names(b.name.map(&:to_i))}"
84
- # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
90
+ logger.debug " B in A"
91
+ logger.debug " removing #{b.name}"
92
+ logger.debug " adding child #{ab} to #{a.name}"
85
93
  temp = b.previous_sibling
86
94
  root.remove!(b)
87
95
  b = temp
88
96
  a << Tree::TreeNode.new(ab,ab_txes)
89
97
  when 'NOT_EQUAL'
90
- # puts " NOT EQUAL"
91
- # puts " adding child #{@@store.ints2names(ab.map(&:to_i))} to #{@@store.ints2names(a.name.map(&:to_i))}"
98
+ logger.debug " NOT EQUAL"
99
+ logger.debug " adding child #{ab} to #{a.name}"
92
100
  a << Tree::TreeNode.new(ab,ab_txes)
93
101
  end
94
102
  end
95
- # puts "NEW TREE:"
96
- # root.print_tree(1,nil,lambda {|node,pre| puts "#{pre} #{@@store.ints2names(node.name.map(&:to_i))}:#{node.content.size}"})
103
+ logger.debug "NEW TREE:"
104
+ root.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}:#{node.content.size}"})
97
105
  b = b.next_sibling
98
- # puts "A siblings #{a.right_siblings.map(&:name).map {|n| @@store.ints2names(n.map(&:to_i))}}"
99
- # puts "A next sibling #{@@store.ints2names(a.next_sibling.name.map(&:to_i))}}"
100
- # puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
106
+ logger.debug "A next sibling #{b}}"
107
+ logger.debug "A:#{a.name}, B:#{b.nil? ? nil : b.name}"
101
108
  end # siblings.each
102
109
  if !a.children.empty?
103
- # puts "TRAVERSING DOWN"
110
+ logger.debug "TRAVERSING DOWN"
104
111
  self.extend_nodes(a, closed_rules: closed_rules)
105
112
  end
106
113
  # add node as closed rule if not subsumed by another rule already added
107
114
  rule_frequency = a.content.size
108
115
  rule = a.name
109
116
  if closed_rules[rule_frequency].nil?
110
- # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
117
+ logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
111
118
  closed_rules[rule_frequency] = [rule]
112
119
  else
113
120
  if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
114
- # puts "ADDING NEW CLOSED RULE: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
121
+ logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
115
122
  closed_rules[rule_frequency] << rule
116
123
  else
117
- # puts "RULE SUBSUMED, NOT ADDING: #{@@store.ints2names(rule.map(&:to_i))}:#{rule_frequency}"
124
+ logger.debug "RULE SUBSUMED, NOT ADDING: #{rule}:#{rule_frequency}"
118
125
  end
119
126
  end
120
127
  current_node = current_node.next_sibling
@@ -17,59 +17,78 @@ module Evoc
17
17
  end
18
18
 
19
19
  def sample_transactions
20
- STDERR.puts "Sampling transactions.."
21
20
  # by default we can sample from the whole history
22
21
  sampling_history = Evoc::HistoryStore.base_history
22
+ STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
23
23
  sample = []
24
+
25
+ #################################################################################
26
+ # performing filtering steps on min/max commits size and minimum previous history
27
+ #################################################################################
28
+
29
+ if !self.opts[:recent].nil?
30
+ size = sampling_history.size
31
+ sampling_history = sampling_history[[0,size-self.opts[:recent]].max..-1]
32
+ STDERR.puts " Filtering to the #{self.opts[:recent]} most recent transactions (new pool size: #{sampling_history.size})"
33
+ end
24
34
  # filter out transactions larger than X
35
+ if !self.opts[:minimum_commit_size].nil?
36
+ sampling_history = sampling_history.select {|tx| tx.size >= self.opts[:minimum_commit_size]}
37
+ STDERR.puts " Filtering to txes larger than or equal to #{self.opts[:minimum_commit_size]} (new pool size: #{sampling_history.size})"
38
+ end
25
39
  if !self.opts[:maximum_commit_size].nil?
26
- STDERR.puts "Only sampling txes smaller than #{self.opts[:maximum_commit_size]}"
27
- sampling_history = sampling_history.clone_with_subset(0,sampling_history.size-1,self.opts[:maximum_commit_size])
40
+ sampling_history = sampling_history.select {|tx| tx.size <= self.opts[:maximum_commit_size]}
41
+ STDERR.puts " Filtering to txes smaller than or equal to #{self.opts[:maximum_commit_size]} (new pool size: #{sampling_history.size})"
28
42
  end
29
43
  # only sample transactions that have at least 'minimum_history' previous history
30
44
  if !self.opts[:minimum_history].nil?
31
- STDERR.puts "Only sampling txes with at least #{self.opts[:minimum_history]} previous txes (history)"
32
- if self.opts[:minimum_history] >= sampling_history.size-1
33
- raise ArgumentError, "The history you provided (#{self.opts[:transactions]}), only contains #{sampling_history.size}, not enough to sample with a minimum history set to #{self.opts[:minimum_history]}. Perhaps also #{self.opts[:maximum_commit_size]} must be increased."
45
+ sampling_history = sampling_history.select {|tx| tx.index >= self.opts[:minimum_history]}
46
+ STDERR.puts " Filtering to txes with at least #{self.opts[:minimum_history]} previous txes (new pool size: #{sampling_history.size})"
47
+ end
48
+ filtering_switches = [:recent,:minimum_commit_size,:maximum_commit_size,:minimum_history]
49
+ if filtering_switches.any? {|s| !self.opts[s].nil?}
50
+ if sampling_history.size == 0
51
+ STDERR.puts "WARNING: All transactions were filtered out, unable to sample"
52
+ return []
34
53
  end
35
- sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
36
54
  end
37
- if self.opts[:recent]
38
- STDERR.puts "Taking the #{self.opts[:sample_size]} most recent transactions, this overrides any other sampling params apart from maximum_commit_size"
39
- txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}
40
- sample = txes_larger_than_one.sort_by {|tx| -tx.index}.take(self.opts[:sample_size]).map(&:id)
41
- else
42
- # group the txes by size
43
- groups = sampling_history.group_by {|tx| tx.size}
44
- # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
45
- # i.e., random sampling is done first, then the sampled txes are removed from the sampling
46
- tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
47
- tx_sizes_to_sample_from.each do |group_size|
48
- if group_size == '*'
49
- # TODO: > 2 should be generalized to > X
50
- txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
51
- sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
52
- sample << sampled_ids
53
- STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
54
- # remove sampled txes from sampling_history
55
- filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
56
- sampling_history.clear
57
- filtered_hist.each {|tx| sampling_history << tx}
58
- elsif group_size.to_i
59
- # check if there were any txes of this size
60
- if group = groups[group_size.to_i]
61
- if group.size < self.opts[:sample_size]
62
- logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
63
- end
64
- sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
65
- sample << sampled_ids
66
- STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
67
- else
68
- logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
55
+
56
+ if self.opts[:sample_size] > sampling_history.size
57
+ STDERR.puts "WARNING: The sample size is larger than the available transactions"
58
+ end
59
+
60
+ ######################
61
+ # performing sampling
62
+ ######################
63
+
64
+ # group the txes by size
65
+ groups = sampling_history.group_by {|tx| tx.size}
66
+ # sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
67
+ # i.e., random sampling is done first, then the sampled txes are removed from the sampling
68
+ tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
69
+ tx_sizes_to_sample_from.each do |group_size|
70
+ if group_size == '*'
71
+ sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
72
+ sample << sampled_ids
73
+ STDERR.puts "Sampled #{sampled_ids.size} txes"
74
+ # remove sampled txes from sampling_history
75
+ filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
76
+ sampling_history.clear
77
+ filtered_hist.each {|tx| sampling_history << tx}
78
+ elsif group_size.to_i
79
+ # check if there were any txes of this size
80
+ if group = groups[group_size.to_i]
81
+ if group.size < self.opts[:sample_size]
82
+ logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
69
83
  end
84
+ sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
85
+ sample << sampled_ids
86
+ STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
70
87
  else
71
- raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
88
+ logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
72
89
  end
90
+ else
91
+ raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
73
92
  end
74
93
  end
75
94
  sample.flatten.uniq
@@ -135,17 +154,17 @@ module Evoc
135
154
  # only specified sizes
136
155
  if random_sizes.empty? & !specified_sizes.empty?
137
156
  sampled_queries = specified_sizes.map {|s| items.sample(s)}
138
- # only random sizes
157
+ # only random sizes
139
158
  elsif !random_sizes.empty? & specified_sizes.empty?
140
159
  sampled_queries = random_sizes.map {|s| items.sample(s)}
141
- # random + specified = randomly sample in range defined by specified
142
- # ex:
143
- # specified = [1,3,10,20]
144
- # tx size = 4
145
- #
146
- # 1. remove X in specified that are larger than or equal to 4
147
- # 2. randomly select X in specified = Y
148
- # 3. randomly select Y in tx
160
+ # random + specified = randomly sample in range defined by specified
161
+ # ex:
162
+ # specified = [1,3,10,20]
163
+ # tx size = 4
164
+ #
165
+ # 1. remove X in specified that are larger than or equal to 4
166
+ # 2. randomly select X in specified = Y
167
+ # 3. randomly select Y in tx
149
168
  elsif !random_sizes.empty? & !specified_sizes.empty?
150
169
  specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
151
170
  if randomly_sampled_size = specified_sizes.sample #2.
@@ -231,6 +250,7 @@ module Evoc
231
250
  factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
232
251
  num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
233
252
  invalid_configuration = 0
253
+ last_error = 'no errors'
234
254
  CSV.foreach(self.opts[:queries], headers: true) do |query|
235
255
  # abort if the failsafe file is present
236
256
  if !self.opts[:fail_safe].nil?
@@ -254,7 +274,6 @@ module Evoc
254
274
  end
255
275
 
256
276
  current_scenario = 1
257
- last_error = 'no errors'
258
277
  # - compact removes nil values (not used factors)
259
278
  # - the splat operator '*' turns the array into parameters for #product
260
279
  # - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
data/lib/evoc/tx_store.rb CHANGED
@@ -95,8 +95,8 @@ module Evoc
95
95
  @txes.last
96
96
  end
97
97
 
98
- def [] index
99
- @txes[index]
98
+ def [] *indexes
99
+ @txes[*indexes]
100
100
  end
101
101
 
102
102
  def to_s
data/lib/evoc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Evoc
2
- VERSION = "3.8.1"
2
+ VERSION = "3.9.0"
3
3
  end
@@ -23,7 +23,8 @@ module EvocCLI
23
23
  method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
24
24
  method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
25
25
  method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
26
- method_option :recent, type: :boolean, desc: "If transactions should be the most recent"
26
+ method_option :minimum_commit_size, type: :numeric, default: 2, desc: "Filter out transactions which are smaller than this before sampling"
27
+ method_option :recent, type: :numeric, desc: "Sample in the X most recent transactions"
27
28
  method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
28
29
  method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
29
30
  desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.8.1
4
+ version: 3.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-15 00:00:00.000000000 Z
11
+ date: 2017-03-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler