evoc 3.8.1 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/evoc/algorithms/closed_rules.rb +56 -49
- data/lib/evoc/experiment.rb +69 -50
- data/lib/evoc/tx_store.rb +2 -2
- data/lib/evoc/version.rb +1 -1
- data/lib/evoc_cli/experiment.rb +2 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88a68050e4c0549ae496b8aa53cfdf00c452a596
|
4
|
+
data.tar.gz: 9f51e810f2402c4238f0fd0df127de5df813ea3b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d8cb5d438e8299ffb5ad7a8eabf0cd99c3a3dec8951a32e1e7eea476b5b471a7fa02afd04320d56b5d539c20315bfaaf1422436b545dbbbcc32125ee70d0d2a
|
7
|
+
data.tar.gz: bbaee8dcd9dee18adeb8bb070e7aaf33b19225d237026546d6924684518006f400e527d3758f3f9a8a23cb9bc929a6d56a3d371ee9ac46ec2125f3f86cc61419
|
@@ -1,51 +1,59 @@
|
|
1
1
|
module Evoc
|
2
2
|
class ClosedRules
|
3
|
+
extend Logging
|
4
|
+
|
3
5
|
def self.closed_rules(tx_store:,query:)
|
4
6
|
# @@store = tx_store
|
5
7
|
# create initial trees, one tree per consequent
|
6
8
|
tree = self.initialize_tree(tx_store,query)
|
7
|
-
|
8
|
-
|
9
|
+
logger.debug "INIT TREE:"
|
10
|
+
tree.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}"})
|
9
11
|
closed_rules = Evoc::RuleStore.new(query: query)
|
10
|
-
tree.children.each do |consequent|
|
11
|
-
self.extend_nodes(
|
12
|
+
# tree.children.each do |consequent|
|
13
|
+
self.extend_nodes(tree).each do |frequency, closed_sets|
|
12
14
|
closed_sets.each do |closed_set|
|
13
15
|
antecedent = closed_set - consequent.name
|
14
16
|
closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
|
15
17
|
end
|
16
18
|
end
|
17
|
-
end
|
19
|
+
# end
|
18
20
|
return closed_rules
|
19
21
|
end
|
20
22
|
|
21
23
|
private
|
22
24
|
def self.initialize_tree(tx_store, query)
|
25
|
+
rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
|
23
26
|
tree = Tree::TreeNode.new([])
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
29
|
-
antecedent = (query & tx.items)
|
30
|
-
consequents = (tx.items - antecedent)
|
31
|
-
if consequents.size != 0
|
32
|
-
consequents.each do |consequent|
|
33
|
-
consequent_key = [consequent.to_s]
|
34
|
-
if tree[consequent_key].nil?
|
35
|
-
# initialize candidates
|
36
|
-
tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
37
|
-
end
|
38
|
-
txes_consequent = tree[consequent_key].content
|
39
|
-
antecedent.each do |item|
|
40
|
-
union = [item.to_s,consequent.to_s]
|
41
|
-
if tree[consequent_key][union].nil?
|
42
|
-
txes_union = tx_store.transactions_of(item) & txes_consequent
|
43
|
-
tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
27
|
+
rules.each do |rule|
|
28
|
+
txes_union = tx_store.transactions_of(rule.lhs.first) & tx_store.transactions_of(rule.rhs.first)
|
29
|
+
union = [rule.lhs.first.to_s,rule.rhs.first.to_s]
|
30
|
+
tree << Tree::TreeNode.new(union,txes_union)
|
48
31
|
end
|
32
|
+
# # find all items that changed with something in the query
|
33
|
+
# query_changed_in = tx_store.transactions_of_list(query)
|
34
|
+
# # store all items from the query that have changed with each consequent
|
35
|
+
# query_changed_in.each do |tx_id|
|
36
|
+
# tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
37
|
+
# antecedent = (query & tx.items)
|
38
|
+
# consequents = (tx.items - antecedent)
|
39
|
+
# if consequents.size != 0
|
40
|
+
# consequents.each do |consequent|
|
41
|
+
# consequent_key = [consequent.to_s]
|
42
|
+
# if tree[consequent_key].nil?
|
43
|
+
# # initialize candidates
|
44
|
+
# tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
45
|
+
# end
|
46
|
+
# txes_consequent = tree[consequent_key].content
|
47
|
+
# antecedent.each do |item|
|
48
|
+
# union = [item.to_s,consequent.to_s]
|
49
|
+
# if tree[consequent_key][union].nil?
|
50
|
+
# txes_union = tx_store.transactions_of(item) & txes_consequent
|
51
|
+
# tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
# end
|
55
|
+
# end
|
56
|
+
# end
|
49
57
|
return(tree)
|
50
58
|
end
|
51
59
|
|
@@ -55,7 +63,7 @@ module Evoc
|
|
55
63
|
a = current_node
|
56
64
|
b = a.next_sibling
|
57
65
|
while(!b.nil?) do
|
58
|
-
|
66
|
+
logger.debug "Checking #{a.name}:{#{a.content}} against #{b.name}:{#{b.content}}"
|
59
67
|
ab = a.name | b.name
|
60
68
|
a_txes = a.content
|
61
69
|
b_txes = b.content
|
@@ -67,54 +75,53 @@ module Evoc
|
|
67
75
|
if ab_txes.size > 0
|
68
76
|
case self.compare(a_txes,b_txes)
|
69
77
|
when 'EQUAL'
|
70
|
-
|
71
|
-
|
72
|
-
|
78
|
+
logger.debug " EQUAL"
|
79
|
+
logger.debug " removing #{b.name}"
|
80
|
+
logger.debug " renaming #{a.name} to #{ab}"
|
73
81
|
temp = b.previous_sibling
|
74
82
|
root.remove!(b)
|
75
83
|
b = temp
|
76
84
|
a.each {|n| n.rename(ab | n.name)}
|
77
85
|
when 'A_IN_B'
|
78
|
-
|
79
|
-
|
86
|
+
logger.debug " A in B"
|
87
|
+
logger.debug " renaming #{a.name} to #{ab}"
|
80
88
|
a.each {|n| n.rename(ab | n.name)}
|
81
89
|
when 'B_IN_A'
|
82
|
-
|
83
|
-
|
84
|
-
|
90
|
+
logger.debug " B in A"
|
91
|
+
logger.debug " removing #{b.name}"
|
92
|
+
logger.debug " adding child #{ab} to #{a.name}"
|
85
93
|
temp = b.previous_sibling
|
86
94
|
root.remove!(b)
|
87
95
|
b = temp
|
88
96
|
a << Tree::TreeNode.new(ab,ab_txes)
|
89
97
|
when 'NOT_EQUAL'
|
90
|
-
|
91
|
-
|
98
|
+
logger.debug " NOT EQUAL"
|
99
|
+
logger.debug " adding child #{ab} to #{a.name}"
|
92
100
|
a << Tree::TreeNode.new(ab,ab_txes)
|
93
101
|
end
|
94
102
|
end
|
95
|
-
|
96
|
-
|
103
|
+
logger.debug "NEW TREE:"
|
104
|
+
root.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}:#{node.content.size}"})
|
97
105
|
b = b.next_sibling
|
98
|
-
|
99
|
-
|
100
|
-
# puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
|
106
|
+
logger.debug "A next sibling #{b}}"
|
107
|
+
logger.debug "A:#{a.name}, B:#{b.nil? ? nil : b.name}"
|
101
108
|
end # siblings.each
|
102
109
|
if !a.children.empty?
|
103
|
-
|
110
|
+
logger.debug "TRAVERSING DOWN"
|
104
111
|
self.extend_nodes(a, closed_rules: closed_rules)
|
105
112
|
end
|
106
113
|
# add node as closed rule if not subsumed by another rule already added
|
107
114
|
rule_frequency = a.content.size
|
108
115
|
rule = a.name
|
109
116
|
if closed_rules[rule_frequency].nil?
|
110
|
-
|
117
|
+
logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
|
111
118
|
closed_rules[rule_frequency] = [rule]
|
112
119
|
else
|
113
120
|
if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
|
114
|
-
|
121
|
+
logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
|
115
122
|
closed_rules[rule_frequency] << rule
|
116
123
|
else
|
117
|
-
|
124
|
+
logger.debug "RULE SUBSUMED, NOT ADDING: #{rule}:#{rule_frequency}"
|
118
125
|
end
|
119
126
|
end
|
120
127
|
current_node = current_node.next_sibling
|
data/lib/evoc/experiment.rb
CHANGED
@@ -17,59 +17,78 @@ module Evoc
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def sample_transactions
|
20
|
-
STDERR.puts "Sampling transactions.."
|
21
20
|
# by default we can sample from the whole history
|
22
21
|
sampling_history = Evoc::HistoryStore.base_history
|
22
|
+
STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
|
23
23
|
sample = []
|
24
|
+
|
25
|
+
#################################################################################
|
26
|
+
# performing filtering steps on min/max commits size and minimum previous history
|
27
|
+
#################################################################################
|
28
|
+
|
29
|
+
if !self.opts[:recent].nil?
|
30
|
+
size = sampling_history.size
|
31
|
+
sampling_history = sampling_history[[0,size-self.opts[:recent]].max..-1]
|
32
|
+
STDERR.puts " Filtering to the #{self.opts[:recent]} most recent transactions (new pool size: #{sampling_history.size})"
|
33
|
+
end
|
24
34
|
# filter out transactions larger than X
|
35
|
+
if !self.opts[:minimum_commit_size].nil?
|
36
|
+
sampling_history = sampling_history.select {|tx| tx.size >= self.opts[:minimum_commit_size]}
|
37
|
+
STDERR.puts " Filtering to txes larger than or equal to #{self.opts[:minimum_commit_size]} (new pool size: #{sampling_history.size})"
|
38
|
+
end
|
25
39
|
if !self.opts[:maximum_commit_size].nil?
|
26
|
-
|
27
|
-
|
40
|
+
sampling_history = sampling_history.select {|tx| tx.size <= self.opts[:maximum_commit_size]}
|
41
|
+
STDERR.puts " Filtering to txes smaller than or equal to #{self.opts[:maximum_commit_size]} (new pool size: #{sampling_history.size})"
|
28
42
|
end
|
29
43
|
# only sample transactions that have at least 'minimum_history' previous history
|
30
44
|
if !self.opts[:minimum_history].nil?
|
31
|
-
|
32
|
-
|
33
|
-
|
45
|
+
sampling_history = sampling_history.select {|tx| tx.index >= self.opts[:minimum_history]}
|
46
|
+
STDERR.puts " Filtering to txes with at least #{self.opts[:minimum_history]} previous txes (new pool size: #{sampling_history.size})"
|
47
|
+
end
|
48
|
+
filtering_switches = [:recent,:minimum_commit_size,:maximum_commit_size,:minimum_history]
|
49
|
+
if filtering_switches.any? {|s| !self.opts[s].nil?}
|
50
|
+
if sampling_history.size == 0
|
51
|
+
STDERR.puts "WARNING: All transactions were filtered out, unable to sample"
|
52
|
+
return []
|
34
53
|
end
|
35
|
-
sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
|
36
54
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
sample << sampled_ids
|
66
|
-
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
|
67
|
-
else
|
68
|
-
logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
|
55
|
+
|
56
|
+
if self.opts[:sample_size] > sampling_history.size
|
57
|
+
STDERR.puts "WARNING: The sample size is larger than the available transactions"
|
58
|
+
end
|
59
|
+
|
60
|
+
######################
|
61
|
+
# performing sampling
|
62
|
+
######################
|
63
|
+
|
64
|
+
# group the txes by size
|
65
|
+
groups = sampling_history.group_by {|tx| tx.size}
|
66
|
+
# sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
|
67
|
+
# i.e., random sampling is done first, then the sampled txes are removed from the sampling
|
68
|
+
tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
|
69
|
+
tx_sizes_to_sample_from.each do |group_size|
|
70
|
+
if group_size == '*'
|
71
|
+
sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
|
72
|
+
sample << sampled_ids
|
73
|
+
STDERR.puts "Sampled #{sampled_ids.size} txes"
|
74
|
+
# remove sampled txes from sampling_history
|
75
|
+
filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
|
76
|
+
sampling_history.clear
|
77
|
+
filtered_hist.each {|tx| sampling_history << tx}
|
78
|
+
elsif group_size.to_i
|
79
|
+
# check if there were any txes of this size
|
80
|
+
if group = groups[group_size.to_i]
|
81
|
+
if group.size < self.opts[:sample_size]
|
82
|
+
logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
|
69
83
|
end
|
84
|
+
sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
|
85
|
+
sample << sampled_ids
|
86
|
+
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
|
70
87
|
else
|
71
|
-
|
88
|
+
logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
|
72
89
|
end
|
90
|
+
else
|
91
|
+
raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
|
73
92
|
end
|
74
93
|
end
|
75
94
|
sample.flatten.uniq
|
@@ -135,17 +154,17 @@ module Evoc
|
|
135
154
|
# only specified sizes
|
136
155
|
if random_sizes.empty? & !specified_sizes.empty?
|
137
156
|
sampled_queries = specified_sizes.map {|s| items.sample(s)}
|
138
|
-
|
157
|
+
# only random sizes
|
139
158
|
elsif !random_sizes.empty? & specified_sizes.empty?
|
140
159
|
sampled_queries = random_sizes.map {|s| items.sample(s)}
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
160
|
+
# random + specified = randomly sample in range defined by specified
|
161
|
+
# ex:
|
162
|
+
# specified = [1,3,10,20]
|
163
|
+
# tx size = 4
|
164
|
+
#
|
165
|
+
# 1. remove X in specified that are larger than or equal to 4
|
166
|
+
# 2. randomly select X in specified = Y
|
167
|
+
# 3. randomly select Y in tx
|
149
168
|
elsif !random_sizes.empty? & !specified_sizes.empty?
|
150
169
|
specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
|
151
170
|
if randomly_sampled_size = specified_sizes.sample #2.
|
@@ -231,6 +250,7 @@ module Evoc
|
|
231
250
|
factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
|
232
251
|
num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
|
233
252
|
invalid_configuration = 0
|
253
|
+
last_error = 'no errors'
|
234
254
|
CSV.foreach(self.opts[:queries], headers: true) do |query|
|
235
255
|
# abort if the failsafe file is present
|
236
256
|
if !self.opts[:fail_safe].nil?
|
@@ -254,7 +274,6 @@ module Evoc
|
|
254
274
|
end
|
255
275
|
|
256
276
|
current_scenario = 1
|
257
|
-
last_error = 'no errors'
|
258
277
|
# - compact removes nil values (not used factors)
|
259
278
|
# - the splat operator '*' turns the array into parameters for #product
|
260
279
|
# - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
|
data/lib/evoc/tx_store.rb
CHANGED
data/lib/evoc/version.rb
CHANGED
data/lib/evoc_cli/experiment.rb
CHANGED
@@ -23,7 +23,8 @@ module EvocCLI
|
|
23
23
|
method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
|
24
24
|
method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
|
25
25
|
method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
|
26
|
-
method_option :
|
26
|
+
method_option :minimum_commit_size, type: :numeric, default: 2, desc: "Filter out transactions which are smaller than this before sampling"
|
27
|
+
method_option :recent, type: :numeric, desc: "Sample in the X most recent transactions"
|
27
28
|
method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
|
28
29
|
method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
|
29
30
|
desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|