evoc 3.8.1 → 3.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/evoc/algorithms/closed_rules.rb +56 -49
- data/lib/evoc/experiment.rb +69 -50
- data/lib/evoc/tx_store.rb +2 -2
- data/lib/evoc/version.rb +1 -1
- data/lib/evoc_cli/experiment.rb +2 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88a68050e4c0549ae496b8aa53cfdf00c452a596
|
4
|
+
data.tar.gz: 9f51e810f2402c4238f0fd0df127de5df813ea3b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d8cb5d438e8299ffb5ad7a8eabf0cd99c3a3dec8951a32e1e7eea476b5b471a7fa02afd04320d56b5d539c20315bfaaf1422436b545dbbbcc32125ee70d0d2a
|
7
|
+
data.tar.gz: bbaee8dcd9dee18adeb8bb070e7aaf33b19225d237026546d6924684518006f400e527d3758f3f9a8a23cb9bc929a6d56a3d371ee9ac46ec2125f3f86cc61419
|
@@ -1,51 +1,59 @@
|
|
1
1
|
module Evoc
|
2
2
|
class ClosedRules
|
3
|
+
extend Logging
|
4
|
+
|
3
5
|
def self.closed_rules(tx_store:,query:)
|
4
6
|
# @@store = tx_store
|
5
7
|
# create initial trees, one tree per consequent
|
6
8
|
tree = self.initialize_tree(tx_store,query)
|
7
|
-
|
8
|
-
|
9
|
+
logger.debug "INIT TREE:"
|
10
|
+
tree.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}"})
|
9
11
|
closed_rules = Evoc::RuleStore.new(query: query)
|
10
|
-
tree.children.each do |consequent|
|
11
|
-
self.extend_nodes(
|
12
|
+
# tree.children.each do |consequent|
|
13
|
+
self.extend_nodes(tree).each do |frequency, closed_sets|
|
12
14
|
closed_sets.each do |closed_set|
|
13
15
|
antecedent = closed_set - consequent.name
|
14
16
|
closed_rules << Evoc::Rule.new(lhs: antecedent.map(&:to_i), rhs: consequent.name.map(&:to_i),tx_store: tx_store, m_support: frequency.to_r/tx_store.size)
|
15
17
|
end
|
16
18
|
end
|
17
|
-
end
|
19
|
+
# end
|
18
20
|
return closed_rules
|
19
21
|
end
|
20
22
|
|
21
23
|
private
|
22
24
|
def self.initialize_tree(tx_store, query)
|
25
|
+
rules = Evoc::Algorithm.co_change(tx_store: tx_store, query: query)
|
23
26
|
tree = Tree::TreeNode.new([])
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
29
|
-
antecedent = (query & tx.items)
|
30
|
-
consequents = (tx.items - antecedent)
|
31
|
-
if consequents.size != 0
|
32
|
-
consequents.each do |consequent|
|
33
|
-
consequent_key = [consequent.to_s]
|
34
|
-
if tree[consequent_key].nil?
|
35
|
-
# initialize candidates
|
36
|
-
tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
37
|
-
end
|
38
|
-
txes_consequent = tree[consequent_key].content
|
39
|
-
antecedent.each do |item|
|
40
|
-
union = [item.to_s,consequent.to_s]
|
41
|
-
if tree[consequent_key][union].nil?
|
42
|
-
txes_union = tx_store.transactions_of(item) & txes_consequent
|
43
|
-
tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
27
|
+
rules.each do |rule|
|
28
|
+
txes_union = tx_store.transactions_of(rule.lhs.first) & tx_store.transactions_of(rule.rhs.first)
|
29
|
+
union = [rule.lhs.first.to_s,rule.rhs.first.to_s]
|
30
|
+
tree << Tree::TreeNode.new(union,txes_union)
|
48
31
|
end
|
32
|
+
# # find all items that changed with something in the query
|
33
|
+
# query_changed_in = tx_store.transactions_of_list(query)
|
34
|
+
# # store all items from the query that have changed with each consequent
|
35
|
+
# query_changed_in.each do |tx_id|
|
36
|
+
# tx = tx_store.get_tx(id:tx_id,id_type: :index)
|
37
|
+
# antecedent = (query & tx.items)
|
38
|
+
# consequents = (tx.items - antecedent)
|
39
|
+
# if consequents.size != 0
|
40
|
+
# consequents.each do |consequent|
|
41
|
+
# consequent_key = [consequent.to_s]
|
42
|
+
# if tree[consequent_key].nil?
|
43
|
+
# # initialize candidates
|
44
|
+
# tree << Tree::TreeNode.new([consequent.to_s],tx_store.transactions_of(consequent))
|
45
|
+
# end
|
46
|
+
# txes_consequent = tree[consequent_key].content
|
47
|
+
# antecedent.each do |item|
|
48
|
+
# union = [item.to_s,consequent.to_s]
|
49
|
+
# if tree[consequent_key][union].nil?
|
50
|
+
# txes_union = tx_store.transactions_of(item) & txes_consequent
|
51
|
+
# tree[consequent_key] << Tree::TreeNode.new(union,txes_union)
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
# end
|
55
|
+
# end
|
56
|
+
# end
|
49
57
|
return(tree)
|
50
58
|
end
|
51
59
|
|
@@ -55,7 +63,7 @@ module Evoc
|
|
55
63
|
a = current_node
|
56
64
|
b = a.next_sibling
|
57
65
|
while(!b.nil?) do
|
58
|
-
|
66
|
+
logger.debug "Checking #{a.name}:{#{a.content}} against #{b.name}:{#{b.content}}"
|
59
67
|
ab = a.name | b.name
|
60
68
|
a_txes = a.content
|
61
69
|
b_txes = b.content
|
@@ -67,54 +75,53 @@ module Evoc
|
|
67
75
|
if ab_txes.size > 0
|
68
76
|
case self.compare(a_txes,b_txes)
|
69
77
|
when 'EQUAL'
|
70
|
-
|
71
|
-
|
72
|
-
|
78
|
+
logger.debug " EQUAL"
|
79
|
+
logger.debug " removing #{b.name}"
|
80
|
+
logger.debug " renaming #{a.name} to #{ab}"
|
73
81
|
temp = b.previous_sibling
|
74
82
|
root.remove!(b)
|
75
83
|
b = temp
|
76
84
|
a.each {|n| n.rename(ab | n.name)}
|
77
85
|
when 'A_IN_B'
|
78
|
-
|
79
|
-
|
86
|
+
logger.debug " A in B"
|
87
|
+
logger.debug " renaming #{a.name} to #{ab}"
|
80
88
|
a.each {|n| n.rename(ab | n.name)}
|
81
89
|
when 'B_IN_A'
|
82
|
-
|
83
|
-
|
84
|
-
|
90
|
+
logger.debug " B in A"
|
91
|
+
logger.debug " removing #{b.name}"
|
92
|
+
logger.debug " adding child #{ab} to #{a.name}"
|
85
93
|
temp = b.previous_sibling
|
86
94
|
root.remove!(b)
|
87
95
|
b = temp
|
88
96
|
a << Tree::TreeNode.new(ab,ab_txes)
|
89
97
|
when 'NOT_EQUAL'
|
90
|
-
|
91
|
-
|
98
|
+
logger.debug " NOT EQUAL"
|
99
|
+
logger.debug " adding child #{ab} to #{a.name}"
|
92
100
|
a << Tree::TreeNode.new(ab,ab_txes)
|
93
101
|
end
|
94
102
|
end
|
95
|
-
|
96
|
-
|
103
|
+
logger.debug "NEW TREE:"
|
104
|
+
root.print_tree(1,nil,lambda {|node,pre| logger.debug "#{pre} #{node.name}:#{node.content.size}"})
|
97
105
|
b = b.next_sibling
|
98
|
-
|
99
|
-
|
100
|
-
# puts "A:#{@@store.ints2names(a.name.map(&:to_i))}, B:#{b.nil? ? nil : @@store.ints2names(b.name.map(&:to_i))}"
|
106
|
+
logger.debug "A next sibling #{b}}"
|
107
|
+
logger.debug "A:#{a.name}, B:#{b.nil? ? nil : b.name}"
|
101
108
|
end # siblings.each
|
102
109
|
if !a.children.empty?
|
103
|
-
|
110
|
+
logger.debug "TRAVERSING DOWN"
|
104
111
|
self.extend_nodes(a, closed_rules: closed_rules)
|
105
112
|
end
|
106
113
|
# add node as closed rule if not subsumed by another rule already added
|
107
114
|
rule_frequency = a.content.size
|
108
115
|
rule = a.name
|
109
116
|
if closed_rules[rule_frequency].nil?
|
110
|
-
|
117
|
+
logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
|
111
118
|
closed_rules[rule_frequency] = [rule]
|
112
119
|
else
|
113
120
|
if !closed_rules[rule_frequency].any? {|closed| (rule - closed).empty? }
|
114
|
-
|
121
|
+
logger.debug "ADDING NEW CLOSED RULE: #{rule}:#{rule_frequency}"
|
115
122
|
closed_rules[rule_frequency] << rule
|
116
123
|
else
|
117
|
-
|
124
|
+
logger.debug "RULE SUBSUMED, NOT ADDING: #{rule}:#{rule_frequency}"
|
118
125
|
end
|
119
126
|
end
|
120
127
|
current_node = current_node.next_sibling
|
data/lib/evoc/experiment.rb
CHANGED
@@ -17,59 +17,78 @@ module Evoc
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def sample_transactions
|
20
|
-
STDERR.puts "Sampling transactions.."
|
21
20
|
# by default we can sample from the whole history
|
22
21
|
sampling_history = Evoc::HistoryStore.base_history
|
22
|
+
STDERR.puts "Sampling transactions from a pool of #{sampling_history.size}.."
|
23
23
|
sample = []
|
24
|
+
|
25
|
+
#################################################################################
|
26
|
+
# performing filtering steps on min/max commits size and minimum previous history
|
27
|
+
#################################################################################
|
28
|
+
|
29
|
+
if !self.opts[:recent].nil?
|
30
|
+
size = sampling_history.size
|
31
|
+
sampling_history = sampling_history[[0,size-self.opts[:recent]].max..-1]
|
32
|
+
STDERR.puts " Filtering to the #{self.opts[:recent]} most recent transactions (new pool size: #{sampling_history.size})"
|
33
|
+
end
|
24
34
|
# filter out transactions larger than X
|
35
|
+
if !self.opts[:minimum_commit_size].nil?
|
36
|
+
sampling_history = sampling_history.select {|tx| tx.size >= self.opts[:minimum_commit_size]}
|
37
|
+
STDERR.puts " Filtering to txes larger than or equal to #{self.opts[:minimum_commit_size]} (new pool size: #{sampling_history.size})"
|
38
|
+
end
|
25
39
|
if !self.opts[:maximum_commit_size].nil?
|
26
|
-
|
27
|
-
|
40
|
+
sampling_history = sampling_history.select {|tx| tx.size <= self.opts[:maximum_commit_size]}
|
41
|
+
STDERR.puts " Filtering to txes smaller than or equal to #{self.opts[:maximum_commit_size]} (new pool size: #{sampling_history.size})"
|
28
42
|
end
|
29
43
|
# only sample transactions that have at least 'minimum_history' previous history
|
30
44
|
if !self.opts[:minimum_history].nil?
|
31
|
-
|
32
|
-
|
33
|
-
|
45
|
+
sampling_history = sampling_history.select {|tx| tx.index >= self.opts[:minimum_history]}
|
46
|
+
STDERR.puts " Filtering to txes with at least #{self.opts[:minimum_history]} previous txes (new pool size: #{sampling_history.size})"
|
47
|
+
end
|
48
|
+
filtering_switches = [:recent,:minimum_commit_size,:maximum_commit_size,:minimum_history]
|
49
|
+
if filtering_switches.any? {|s| !self.opts[s].nil?}
|
50
|
+
if sampling_history.size == 0
|
51
|
+
STDERR.puts "WARNING: All transactions were filtered out, unable to sample"
|
52
|
+
return []
|
34
53
|
end
|
35
|
-
sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
|
36
54
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
sample << sampled_ids
|
66
|
-
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
|
67
|
-
else
|
68
|
-
logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
|
55
|
+
|
56
|
+
if self.opts[:sample_size] > sampling_history.size
|
57
|
+
STDERR.puts "WARNING: The sample size is larger than the available transactions"
|
58
|
+
end
|
59
|
+
|
60
|
+
######################
|
61
|
+
# performing sampling
|
62
|
+
######################
|
63
|
+
|
64
|
+
# group the txes by size
|
65
|
+
groups = sampling_history.group_by {|tx| tx.size}
|
66
|
+
# sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
|
67
|
+
# i.e., random sampling is done first, then the sampled txes are removed from the sampling
|
68
|
+
tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
|
69
|
+
tx_sizes_to_sample_from.each do |group_size|
|
70
|
+
if group_size == '*'
|
71
|
+
sampled_ids = sampling_history.map(&:id).sample(self.opts[:sample_size])
|
72
|
+
sample << sampled_ids
|
73
|
+
STDERR.puts "Sampled #{sampled_ids.size} txes"
|
74
|
+
# remove sampled txes from sampling_history
|
75
|
+
filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
|
76
|
+
sampling_history.clear
|
77
|
+
filtered_hist.each {|tx| sampling_history << tx}
|
78
|
+
elsif group_size.to_i
|
79
|
+
# check if there were any txes of this size
|
80
|
+
if group = groups[group_size.to_i]
|
81
|
+
if group.size < self.opts[:sample_size]
|
82
|
+
logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
|
69
83
|
end
|
84
|
+
sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
|
85
|
+
sample << sampled_ids
|
86
|
+
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
|
70
87
|
else
|
71
|
-
|
88
|
+
logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
|
72
89
|
end
|
90
|
+
else
|
91
|
+
raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
|
73
92
|
end
|
74
93
|
end
|
75
94
|
sample.flatten.uniq
|
@@ -135,17 +154,17 @@ module Evoc
|
|
135
154
|
# only specified sizes
|
136
155
|
if random_sizes.empty? & !specified_sizes.empty?
|
137
156
|
sampled_queries = specified_sizes.map {|s| items.sample(s)}
|
138
|
-
|
157
|
+
# only random sizes
|
139
158
|
elsif !random_sizes.empty? & specified_sizes.empty?
|
140
159
|
sampled_queries = random_sizes.map {|s| items.sample(s)}
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
160
|
+
# random + specified = randomly sample in range defined by specified
|
161
|
+
# ex:
|
162
|
+
# specified = [1,3,10,20]
|
163
|
+
# tx size = 4
|
164
|
+
#
|
165
|
+
# 1. remove X in specified that are larger than or equal to 4
|
166
|
+
# 2. randomly select X in specified = Y
|
167
|
+
# 3. randomly select Y in tx
|
149
168
|
elsif !random_sizes.empty? & !specified_sizes.empty?
|
150
169
|
specified_sizes.select! {|s| (s < tx_size) & (s > 0)} #1.
|
151
170
|
if randomly_sampled_size = specified_sizes.sample #2.
|
@@ -231,6 +250,7 @@ module Evoc
|
|
231
250
|
factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
|
232
251
|
num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
|
233
252
|
invalid_configuration = 0
|
253
|
+
last_error = 'no errors'
|
234
254
|
CSV.foreach(self.opts[:queries], headers: true) do |query|
|
235
255
|
# abort if the failsafe file is present
|
236
256
|
if !self.opts[:fail_safe].nil?
|
@@ -254,7 +274,6 @@ module Evoc
|
|
254
274
|
end
|
255
275
|
|
256
276
|
current_scenario = 1
|
257
|
-
last_error = 'no errors'
|
258
277
|
# - compact removes nil values (not used factors)
|
259
278
|
# - the splat operator '*' turns the array into parameters for #product
|
260
279
|
# - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
|
data/lib/evoc/tx_store.rb
CHANGED
data/lib/evoc/version.rb
CHANGED
data/lib/evoc_cli/experiment.rb
CHANGED
@@ -23,7 +23,8 @@ module EvocCLI
|
|
23
23
|
method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
|
24
24
|
method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
|
25
25
|
method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
|
26
|
-
method_option :
|
26
|
+
method_option :minimum_commit_size, type: :numeric, default: 2, desc: "Filter out transactions which are smaller than this before sampling"
|
27
|
+
method_option :recent, type: :numeric, desc: "Sample in the X most recent transactions"
|
27
28
|
method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
|
28
29
|
method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
|
29
30
|
desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|