evoc 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
@@ -0,0 +1,239 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Experiment
|
3
|
+
include Logging
|
4
|
+
attr_accessor :opts
|
5
|
+
|
6
|
+
@@logger_level = :error
|
7
|
+
|
8
|
+
def initialize(opts = Hash.new)
|
9
|
+
self.opts = opts
|
10
|
+
# Set logger level
|
11
|
+
Logging.set_level(self.opts[:logger_level])
|
12
|
+
logger.debug "Initialized experiment with options: #{opts}"
|
13
|
+
# setup history
|
14
|
+
if !opts[:transactions].nil?
|
15
|
+
Evoc::HistoryStore.initialize(path: self.opts[:transactions],case_id: self.opts[:case_id], granularity: self.opts[:granularity])
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def sample_transactions
|
20
|
+
STDERR.puts "Sampling transactions.."
|
21
|
+
# by default we can sample from the whole history
|
22
|
+
sampling_history = Evoc::HistoryStore.base_history
|
23
|
+
sample = []
|
24
|
+
# filter out transactions larger than X
|
25
|
+
if !self.opts[:maximum_commit_size].nil?
|
26
|
+
STDERR.puts "Only sampling txes smaller than #{self.opts[:maximum_commit_size]}"
|
27
|
+
sampling_history = sampling_history.clone_with_subset(0,sampling_history.size-1,self.opts[:maximum_commit_size])
|
28
|
+
end
|
29
|
+
# only sample transactions that have at least 'minimum_history' previous history
|
30
|
+
if !self.opts[:minimum_history].nil?
|
31
|
+
STDERR.puts "Only sampling txes with at least #{self.opts[:minimum_history]} previous txes (history)"
|
32
|
+
if self.opts[:minimum_history] >= sampling_history.size-1
|
33
|
+
raise ArgumentError, "The history you provided (#{self.opts[:transactions]}), only contains #{sampling_history.size}, not enough to sample with a minimum history set to #{self.opts[:minimum_history]}. Perhaps also #{self.opts[:maximum_commit_size]} must be increased."
|
34
|
+
end
|
35
|
+
sampling_history = sampling_history.clone_with_subset(self.opts[:minimum_history],sampling_history.size-1)
|
36
|
+
end
|
37
|
+
# group the txes by size
|
38
|
+
groups = sampling_history.group_by {|tx| tx.size}
|
39
|
+
# sort the sample_groups option to reduce the need for maintaining control over which txes that have been sampled
|
40
|
+
# i.e., random sampling is done first, then the sampled txes are removed from the sampling
|
41
|
+
tx_sizes_to_sample_from = self.opts[:sample_groups].sort_by(&:to_s)
|
42
|
+
tx_sizes_to_sample_from.each do |group_size|
|
43
|
+
if group_size == '*'
|
44
|
+
# TODO: > 2 should be generalized to > X
|
45
|
+
txes_larger_than_one = sampling_history.select {|tx| tx.size > 2}.map(&:id)
|
46
|
+
sampled_ids = txes_larger_than_one.sample(self.opts[:sample_size])
|
47
|
+
sample << sampled_ids
|
48
|
+
STDERR.puts "Sampled #{sampled_ids.size} txes from the whole history"
|
49
|
+
# remove sampled txes from sampling_history
|
50
|
+
filtered_hist = sampling_history.reject {|tx| sampled_ids.include? tx.id}
|
51
|
+
sampling_history.clear
|
52
|
+
filtered_hist.each {|tx| sampling_history << tx}
|
53
|
+
elsif group_size.to_i
|
54
|
+
# check if there were any txes of this size
|
55
|
+
if group = groups[group_size.to_i]
|
56
|
+
if group.size < self.opts[:sample_size]
|
57
|
+
logger.warn "Only #{group.size} transactions found of size #{group_size}, asked for #{self.opts[:sample_size]}"
|
58
|
+
end
|
59
|
+
sampled_ids = group.sample(self.opts[:sample_size]).map(&:id)
|
60
|
+
sample << sampled_ids
|
61
|
+
STDERR.puts "Sampled #{sampled_ids.size} txes of size #{group_size}"
|
62
|
+
else
|
63
|
+
logger.warn "No transactions found of size #{group_size}, asked for #{self.opts[:sample_size]} (minimum history: #{self.opts[:minimum_history]})"
|
64
|
+
end
|
65
|
+
else
|
66
|
+
raise ArgumentError.new, "Tx size for sampling must either be specified by an Integer or '*' (was #{group_size}:#{group_size.class})"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
sample.flatten.uniq
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
##
|
74
|
+
# Generates a CSV of queries according to the given options
|
75
|
+
#
|
76
|
+
# CSV HEADER:
|
77
|
+
#
|
78
|
+
# tx_id, query
|
79
|
+
#
|
80
|
+
def generate_queries
|
81
|
+
##
|
82
|
+
# WRITE CSV HEADER
|
83
|
+
CSV {|row| row << %W(tx_id query)}
|
84
|
+
|
85
|
+
###
|
86
|
+
# Iterate over sampled tx ids
|
87
|
+
CSV.foreach(self.opts[:transaction_ids_path], headers: true) do |row|
|
88
|
+
tx_id = row['tx_id']
|
89
|
+
##
|
90
|
+
# GET THE TRANSACTION
|
91
|
+
if tx = Evoc::HistoryStore.base_history.get_tx(id: tx_id, id_type: :id)
|
92
|
+
items = tx.items
|
93
|
+
tx_size = items.size
|
94
|
+
##
|
95
|
+
# SAMPLE QUERIES
|
96
|
+
#
|
97
|
+
# We have 3 different strategies, which may produce the same sizes,
|
98
|
+
# but the same size does not need to be executed several times,
|
99
|
+
# so duplicates are removed
|
100
|
+
specified_sizes = []
|
101
|
+
if !self.opts[:select].nil? then specified_sizes << self.opts[:select].map(&:to_i) end
|
102
|
+
if !self.opts[:reverse_select].nil? then specified_sizes << self.opts[:reverse_select].map {|i| tx_size-i.to_i} end
|
103
|
+
if !self.opts[:percentage].nil? then specified_sizes << self.opts[:percentage].map {|p| (p.to_f/100*tx_size).ceil} end
|
104
|
+
# filter out sizes <= 1
|
105
|
+
specified_sizes.flatten!.select! {|s| s > 0}
|
106
|
+
specified_sizes.uniq!
|
107
|
+
|
108
|
+
random_sizes = []
|
109
|
+
if self.opts[:random_select] then random_sizes << Random.new.rand(1..(tx_size-1)) end
|
110
|
+
|
111
|
+
sampled_queries = []
|
112
|
+
# only specified sizes
|
113
|
+
if random_sizes.empty? & !specified_sizes.empty?
|
114
|
+
sampled_queries = specified_sizes.map {|s| items.sample(s)}
|
115
|
+
# only random sizes
|
116
|
+
elsif !random_sizes.empty? & specified_sizes.empty?
|
117
|
+
sampled_queries = random_sizes.map {|s| items.sample(s)}
|
118
|
+
# random + specified = randomly sample in range defined by specified
|
119
|
+
# ex:
|
120
|
+
# specified = [1,3,10,20]
|
121
|
+
# tx size = 4
|
122
|
+
#
|
123
|
+
# 1. remove X in specified that are larger than or equal to 4
|
124
|
+
# 2. randomly select X in specified = Y
|
125
|
+
# 3. randomly select Y in tx
|
126
|
+
elsif !random_sizes.empty? & !specified_sizes.empty?
|
127
|
+
specified_sizes.select! {|s| (s < tx_size) & (s > 1)} #1.
|
128
|
+
if randomly_sampled_size = specified_sizes.sample #2.
|
129
|
+
sampled_queries = [items.sample(randomly_sampled_size)] #3.
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
if sampled_queries.empty?
|
134
|
+
logger.warn "Unable to generate query from tx: #{items}, with params #{self.opts}"
|
135
|
+
end
|
136
|
+
|
137
|
+
##
|
138
|
+
# WRITE CSV
|
139
|
+
sampled_queries.each do |query|
|
140
|
+
if query.size == tx_size
|
141
|
+
logger.debug "The size of the sampled query was equal to the size of the transaction, skipping.. Tx ID: #{tx_id}. Query size: #{query.size}"
|
142
|
+
next
|
143
|
+
end
|
144
|
+
CSV {|row| row << [tx_id,query.join(',')]}
|
145
|
+
end
|
146
|
+
else
|
147
|
+
raise ArgumentError, "The tx with id '#{tx_id}' was not found in the history: #{self.opts[:transactions]}, wrong file?"
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
#TODO possibly move this to execution
|
152
|
+
#############################################
|
153
|
+
# Filter the generated queries if requested #
|
154
|
+
#############################################
|
155
|
+
#
|
156
|
+
#
|
157
|
+
# logger.debug "Number of queries before filtering: #{query_store.size}"
|
158
|
+
#
|
159
|
+
# if self.opts[:filter_expected_outcome]
|
160
|
+
# STDERR.puts "Filtering expected outcome.."
|
161
|
+
# init_size = query_store.size
|
162
|
+
# query_store.filter_expected_outcome!
|
163
|
+
# STDERR.puts "Had to remove #{init_size - query_store.size} queries as a result of the expected outcome now being empty"
|
164
|
+
# end
|
165
|
+
# if query_store.empty?
|
166
|
+
# STDERR.puts "WARNING: Returning 0 queries (maybe increase sample size?)"
|
167
|
+
# end
|
168
|
+
end
|
169
|
+
|
170
|
+
##
|
171
|
+
# Execute a set of scenarios
|
172
|
+
#
|
173
|
+
# @return: json lines stream to stdout (http://jsonlines.org/)
|
174
|
+
def execute_scenarios
|
175
|
+
|
176
|
+
######
|
177
|
+
# Setup factors
|
178
|
+
#####
|
179
|
+
|
180
|
+
# Factor: model size aka model/learning/history size
|
181
|
+
factor_model_size = self.opts[:model_size].nil? ? nil : self.opts[:model_size].map {|s| [ 'model_size',s ]}
|
182
|
+
# Factor: Max size aka filtering constraint on history
|
183
|
+
factor_max_size = self.opts[:max_size].nil? ? nil : self.opts[:max_size].map {|s| [ 'max_size',s ]}
|
184
|
+
# Factor: Model age aka number of commits between query and last tx in history
|
185
|
+
factor_model_age = self.opts[:model_age].nil? ? nil : self.opts[:model_age].map {|s| [ 'model_age',s ]}
|
186
|
+
# Factor: Algorithm
|
187
|
+
factor_algorithms = self.opts[:algorithms].nil? ? nil : self.opts[:algorithms].map {|a| ['algorithm',a]}
|
188
|
+
# Factor: Measures
|
189
|
+
factor_measures = self.opts[:measures].map {|c| ['measures',c]}
|
190
|
+
# Factor: Aggregator
|
191
|
+
# if aggregation is requested, we also assume that no aggregation wants to be tested
|
192
|
+
# the non-aggregation rows are specified with 'aggregator' == nil
|
193
|
+
factor_aggregators = self.opts[:aggregators].nil? ? nil : (self.opts[:aggregators]+[nil]).map {|a| ['aggregator',a]}
|
194
|
+
# deprecated factor, allways set to 1 for backwards compatibility
|
195
|
+
# factor_permutation = self.opts[:permutation].nil? ? nil : (1..self.opts[:permutation]).to_a.map {|p| [ 'permutation',p ]}
|
196
|
+
factor_permutation = [[ 'permutation',1 ]]
|
197
|
+
|
198
|
+
####
|
199
|
+
# Iterate over the queries provided and execute each query in each scenario
|
200
|
+
#
|
201
|
+
# queries CSV header:
|
202
|
+
# tx_id,tx_index,tx,query,query_size,expected_outcome,expected_outcome_size
|
203
|
+
|
204
|
+
# Count the number of lines so we can properly format the output json
|
205
|
+
num_lines = File.read(self.opts[:queries]).each_line.count-1
|
206
|
+
current_line = 1
|
207
|
+
|
208
|
+
factors = [factor_model_size,factor_max_size,factor_model_age,factor_algorithms,factor_measures,factor_permutation,factor_aggregators].compact
|
209
|
+
num_of_scenarios = factors.inject(1) {|product,f| product * f.size}
|
210
|
+
CSV.foreach(self.opts[:queries], headers: true) do |query|
|
211
|
+
# abort if the failsafe file is present
|
212
|
+
if !self.opts[:fail_safe].nil?
|
213
|
+
if File.exists?(self.opts[:fail_safe])
|
214
|
+
$stderr.puts "\nFail safe detected, exiting.."
|
215
|
+
break
|
216
|
+
end
|
217
|
+
end
|
218
|
+
current_scenario = 1
|
219
|
+
# - compact removes nil values (not used factors)
|
220
|
+
# - the splat operator '*' turns the array into parameters for #product
|
221
|
+
# - the block form of #product makes it lazy (i.e., the whole cartesian product isn't generated at once)
|
222
|
+
factors.first.product(*factors[1..-1]).each do |scenario|
|
223
|
+
# Print progress to stderr
|
224
|
+
STDERR.print "(#{self.opts[:case_id]}) Executing scenario #{current_scenario} of #{num_of_scenarios} on query #{current_line} of #{num_lines} \r"
|
225
|
+
|
226
|
+
params = query.to_h.merge(scenario.to_h)
|
227
|
+
params[:case_id] = self.opts[:case_id]
|
228
|
+
params[:granularity] = self.opts[:granularity]
|
229
|
+
# initialize scenario
|
230
|
+
q = Evoc::Scenario.new(params)
|
231
|
+
$stdout.puts q.call(evaluators: self.opts[:evaluators]).to_json
|
232
|
+
current_scenario += 1
|
233
|
+
end
|
234
|
+
current_line += 1
|
235
|
+
end
|
236
|
+
STDERR.puts "\n(#{self.opts[:case_id]}) DONE"
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
data/lib/evoc/hash.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
class Hash
|
2
|
+
include Logging
|
3
|
+
|
4
|
+
def convert_values(except: [],converter: Rational, convert_empty: false)
|
5
|
+
logger.debug "Converter: #{converter}, Hash: #{self}"
|
6
|
+
if except.is_a?(Array)
|
7
|
+
Hash[self.map {|k, v|
|
8
|
+
if except.include?(k)
|
9
|
+
[k,v]
|
10
|
+
else
|
11
|
+
# don't convert empty fields to avoid side-effects
|
12
|
+
# "".to_i is 0 for example
|
13
|
+
if convert_empty
|
14
|
+
[k, converter.method(converter.to_s).call(v)]
|
15
|
+
else
|
16
|
+
if v.to_s.empty?
|
17
|
+
[k,v]
|
18
|
+
else
|
19
|
+
[k, converter.method(converter.to_s).call(v)]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end}]
|
23
|
+
else
|
24
|
+
raise ArgumentError.new, "Specify an array of keys whose values you don't want to convert"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# stolen from rails
|
30
|
+
# http://api.rubyonrails.org/classes/Hash.html
|
31
|
+
##
|
32
|
+
def symbolize_keys
|
33
|
+
transform_keys{ |key| key.to_sym rescue key }
|
34
|
+
end
|
35
|
+
|
36
|
+
def symbolize_keys!
|
37
|
+
transform_keys!{ |key| key.to_sym rescue key }
|
38
|
+
end
|
39
|
+
|
40
|
+
def transform_keys
|
41
|
+
return enum_for(:transform_keys) unless block_given?
|
42
|
+
result = self.class.new
|
43
|
+
each_key do |key|
|
44
|
+
result[yield(key)] = self[key]
|
45
|
+
end
|
46
|
+
result
|
47
|
+
end
|
48
|
+
|
49
|
+
def transform_keys!
|
50
|
+
return enum_for(:transform_keys!) unless block_given?
|
51
|
+
keys.each do |key|
|
52
|
+
self[yield(key)] = delete(key)
|
53
|
+
end
|
54
|
+
self
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Evoc
|
2
|
+
class HistoryStore
|
3
|
+
extend Logging
|
4
|
+
|
5
|
+
# create accessors for class level instance variables
|
6
|
+
class << self
|
7
|
+
attr_accessor :tag, :history, :svd
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.initialize(path:,case_id: 'CASEID_NOT_PROVIDED', granularity: 'mixed')
|
11
|
+
self.base_history = Evoc::TxStore.new(path: path,case_id: case_id, granularity: granularity)
|
12
|
+
self.tag = gen_tag(0,self.base_history.size,"all")
|
13
|
+
return self
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.get_history(start_index,end_index,max_size=nil)
|
17
|
+
tag = gen_tag(start_index,end_index,max_size)
|
18
|
+
if self.tag.nil?
|
19
|
+
raise Evoc::Exceptions::NotInitialized.new, "The history store must be initialized with a base history before fetching subhistories"
|
20
|
+
elsif self.tag != tag
|
21
|
+
# new history
|
22
|
+
self.tag = tag
|
23
|
+
# create new subset
|
24
|
+
self.history = self.base_history.clone_with_subset(start_index,end_index,max_size)
|
25
|
+
logger.info "Caching new history | start_index: #{start_index}, end_index: #{end_index}, max_size: #{max_size}, actual filtered size: #{self.history.size}"
|
26
|
+
# make the history unmutable
|
27
|
+
self.history.freeze
|
28
|
+
end
|
29
|
+
self.history
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.base_history=tx_store
|
33
|
+
@@base_history = tx_store.freeze
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.base_history
|
37
|
+
@@base_history
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.get_svd(start_index,end_index,max_size=nil)
|
41
|
+
tag = self.gen_tag(start_index,end_index,max_size)
|
42
|
+
if self.svd.nil? || (self.tag != tag)
|
43
|
+
self.svd = Evoc::SVD.new(get_history(start_index,end_index,max_size))
|
44
|
+
end
|
45
|
+
self.svd
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
def self.gen_tag(start_index,end_index,max_size)
|
50
|
+
start_index.to_s+end_index.to_s+max_size.to_s
|
51
|
+
end
|
52
|
+
end # HistoryStore
|
53
|
+
end # Evoc
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Evoc
|
2
|
+
class HyperRule < Rule
|
3
|
+
attr_accessor :lhs, :rhs, :hyper_coefficient, :hyper_confidence
|
4
|
+
|
5
|
+
def initialize(rules,aggregator,measures)
|
6
|
+
if measures.nil? || measures.empty?
|
7
|
+
raise ArgumentError, "At least one measure must be provided when defining a new hyper rule"
|
8
|
+
end
|
9
|
+
self.lhs = rules.map(&:lhs).array_union
|
10
|
+
self.rhs = rules.map(&:rhs).array_union
|
11
|
+
self.tx_store = rules.first.tx_store
|
12
|
+
self.hyper_coefficient = rules.size
|
13
|
+
# aggregate measures
|
14
|
+
measures.each do |m|
|
15
|
+
logger.debug "Aggregating #{self.hyper_coefficient} rules on #{m}"
|
16
|
+
# check if its the hyper confidence
|
17
|
+
if m == 'm_hyper_confidence'
|
18
|
+
antecedent = self.tx_store.transactions_of_list(self.lhs)
|
19
|
+
consequent = self.tx_store.transactions_of_list(self.rhs)
|
20
|
+
numerator = (antecedent & consequent).size
|
21
|
+
denominator = antecedent.size
|
22
|
+
hyper_confidence = numerator/denominator
|
23
|
+
self.set_measure(m,hyper_confidence,hyper_measure: true)
|
24
|
+
else
|
25
|
+
begin
|
26
|
+
# get the 'm' measure of each rule
|
27
|
+
m_measures = rules.map {|rule| rule.get_measure(m)}
|
28
|
+
# remove undefined measures
|
29
|
+
m_measures.reject! {|a| a.value.nil?}
|
30
|
+
# group the measures into negative and positive correlations
|
31
|
+
pos_correlation = m_measures.group_by {|a| a.value > Evoc::Rule.get_mid(m)}
|
32
|
+
positive_correlation = pos_correlation[true].nil? ? 0 : Evoc::InterestingnessMeasureAggregator.new(m,pos_correlation[true]).method(aggregator).call.value
|
33
|
+
# add the agggregated measure
|
34
|
+
self.set_measure(m,positive_correlation,hyper_measure: true)
|
35
|
+
|
36
|
+
# add the hyper coefficient as a measure for aggregators called with '.._hc'
|
37
|
+
if !(aggregator =~ /_hc\z/).nil?
|
38
|
+
self.set_measure('m_hyper_coefficient',hyper_coefficient,hyper_measure: true)
|
39
|
+
end
|
40
|
+
|
41
|
+
rescue Evoc::Exceptions::InterestingnessMeasure::NonFinite
|
42
|
+
logger.debug "Could not aggregate #{m} using #{aggregator}, aggregation produced a non-finite value, probably a range problem, setting measure value to nil"
|
43
|
+
self.set_measure(m,nil, hyper_measure: true)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def name
|
50
|
+
self.lhs.join(',') + " => " + self.rhs.join(',')
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Evoc
|
2
|
+
class InterestingnessMeasure
|
3
|
+
include Comparable, Logging
|
4
|
+
attr_accessor :type, :value, :min, :max, :mid, :hyper_measure
|
5
|
+
|
6
|
+
def initialize(type:,min:,mid:,max:,value: nil,hyper_measure: false)
|
7
|
+
@type = type
|
8
|
+
self.min = min
|
9
|
+
self.max = max
|
10
|
+
self.mid = mid
|
11
|
+
self.hyper_measure = hyper_measure
|
12
|
+
if block_given?
|
13
|
+
self.value = yield
|
14
|
+
else
|
15
|
+
self.value = value
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
#def value
|
20
|
+
# @value.nil? ? @value : @value
|
21
|
+
#end
|
22
|
+
|
23
|
+
def min=m
|
24
|
+
@min = m
|
25
|
+
end
|
26
|
+
|
27
|
+
def max=m
|
28
|
+
@max = m
|
29
|
+
end
|
30
|
+
|
31
|
+
def mid=m
|
32
|
+
@mid = m
|
33
|
+
end
|
34
|
+
|
35
|
+
def value=v
|
36
|
+
if v.nil?
|
37
|
+
@value = nil
|
38
|
+
elsif v.to_f.nan?
|
39
|
+
logger.warn "#{self.type}: #{v} was NAN"
|
40
|
+
@value = nil
|
41
|
+
else
|
42
|
+
@value = v
|
43
|
+
if !self.hyper_measure
|
44
|
+
if !v.between?(self.min,self.max)
|
45
|
+
logger.warn "#{self.type}: #{v} is not in the domain of [#{self.min},#{self.max}]"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def <=> other
|
52
|
+
return nil unless constr_equal_type other
|
53
|
+
self.value <=> other.value
|
54
|
+
end
|
55
|
+
|
56
|
+
def -@
|
57
|
+
-self.value
|
58
|
+
end
|
59
|
+
|
60
|
+
def finite?
|
61
|
+
self.value.finite?
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_s
|
65
|
+
self.value.to_s
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
def constr_equal_type other
|
70
|
+
(self.class == other.class ? true : ( raise ArgumentError, "self: #{self.type}: #{self.class} was of different class than other: #{other.class}" ) ) &
|
71
|
+
(self.max == other.max ? true : ( raise ArgumentError, "self: #{self.type}: #{self.max} had a different max than other: #{other.max}" ) ) &
|
72
|
+
(self.mid == other.mid ? true : ( raise ArgumentError, "self: #{self.type}: #{self.mid} had a different mid than other: #{other.mid}" ) ) &
|
73
|
+
(self.min == other.min ? true : ( raise ArgumentError, "self: #{self.type}: #{self.min} had a different min than other: #{other.min}" ) ) &
|
74
|
+
(self.type == other.type ? true : ( raise ArgumentError, "self: #{self.type}: #{self.type}had a different type than other: #{other.type}" ) )
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module Evoc
|
2
|
+
class InterestingnessMeasureAggregator
|
3
|
+
attr_accessor :type,:min,:mid,:max,:values,:size
|
4
|
+
|
5
|
+
def initialize(type,values)
|
6
|
+
self.type = type
|
7
|
+
self.min = Evoc::InterestingnessMeasures.get_min(type)
|
8
|
+
self.mid = Evoc::InterestingnessMeasures.get_mid(type)
|
9
|
+
self.max = Evoc::InterestingnessMeasures.get_max(type)
|
10
|
+
self.values = values
|
11
|
+
self.size = values.size
|
12
|
+
end
|
13
|
+
|
14
|
+
###
|
15
|
+
# Our own aggregation functions
|
16
|
+
|
17
|
+
def hcg
|
18
|
+
agr = normalize_measures.inject {|tot,i|
|
19
|
+
direction = i > 0 ? self.max-self.mid : self.min-self.mid
|
20
|
+
coefficient = direction.to_f.finite? ? (direction-tot)/direction : 1
|
21
|
+
tot + coefficient*i
|
22
|
+
} + self.mid
|
23
|
+
Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: agr)
|
24
|
+
end
|
25
|
+
|
26
|
+
def hcg_hc
|
27
|
+
self.hcg
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Aggregation functions borrowed from IR
|
32
|
+
|
33
|
+
##
|
34
|
+
# cumulative gain aka sum
|
35
|
+
def cg
|
36
|
+
aggregated = normalize_measures.inject(:+) + self.mid
|
37
|
+
Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: aggregated)
|
38
|
+
end
|
39
|
+
|
40
|
+
# discounted CG
|
41
|
+
def dcg
|
42
|
+
agr = normalize_measures.first + normalize_measures[1..-1].each_with_index.inject(0) {|sum,(element,index)| sum + element/Math.log2(index+2)}
|
43
|
+
agr = agr + self.mid
|
44
|
+
Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: agr)
|
45
|
+
end
|
46
|
+
|
47
|
+
# discounted CG2
|
48
|
+
def dcg2
|
49
|
+
agr = normalize_measures.first + normalize_measures[1..-1].each_with_index.inject(0) {|sum,(element,index)|
|
50
|
+
sum + (element >= 0 ? (2**element-1)/Math.log2(index+2) : -(2**element.abs-1)/Math.log2(index+2))
|
51
|
+
}
|
52
|
+
agr = agr + self.mid
|
53
|
+
Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: agr)
|
54
|
+
end
|
55
|
+
|
56
|
+
# def discounted_cg3
|
57
|
+
# agr = normalize_measures.each_with_index.inject(0) {|sum,(element,index)|
|
58
|
+
# sum + element >= 0 ? (2**element-1)/Math.log2(index+2) : -(2**element.abs-1)/Math.log2(index+2)
|
59
|
+
# }
|
60
|
+
# agr = agr + self.mid
|
61
|
+
# Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: agr)
|
62
|
+
# end
|
63
|
+
#
|
64
|
+
# def discounted_cg4
|
65
|
+
# agr = normalize_measures.each_with_index.inject(0) {|sum,(element,index)|
|
66
|
+
# sum + (2**element)/Math.log2(index+2)
|
67
|
+
# }
|
68
|
+
# agr = agr + self.mid
|
69
|
+
# Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: agr)
|
70
|
+
# end
|
71
|
+
#
|
72
|
+
# def discounted_cg5
|
73
|
+
# agr = normalize_measures.each_with_index.inject(0) {|sum,(element,index)|
|
74
|
+
# sum + element == 0 ? 0 : (2**element)/Math.log2(index+2)
|
75
|
+
# }
|
76
|
+
# agr = agr + self.mid
|
77
|
+
# Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: agr)
|
78
|
+
# end
|
79
|
+
#
|
80
|
+
# def discounted_rank_cg
|
81
|
+
# agr = normalize_measures.each_with_index.inject(0) {|sum,(element,index)|
|
82
|
+
# sum + element/(index+1)
|
83
|
+
# }
|
84
|
+
# agr = agr + self.mid
|
85
|
+
# Evoc::InterestingnessMeasure.new(type: self.type, min: self.min, mid: self.mid, max: self.max, value: agr)
|
86
|
+
# end
|
87
|
+
#
|
88
|
+
|
89
|
+
def normalize_measures
|
90
|
+
self.values.map {|m| m - self.mid}
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# values=
|
95
|
+
#
|
96
|
+
# set and sort by abs value
|
97
|
+
def values=(measures)
|
98
|
+
if measures.all? {|m| m.is_a?(Evoc::InterestingnessMeasure)}
|
99
|
+
@values = measures.map(&:value)
|
100
|
+
else
|
101
|
+
@values = measures
|
102
|
+
end
|
103
|
+
@values.map!(&:to_f)
|
104
|
+
@values.sort! {|x,y| y.abs<=>x.abs}
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
def constr_equal_type other
|
109
|
+
(self.class == other.class ? true : ( raise ArgumentError, "self: #{self.type}: #{self.class} was of different class than other: #{other.class}" ) ) &
|
110
|
+
(self.max == other.max ? true : ( raise ArgumentError, "self: #{self.type}: #{self.max} had a different max than other: #{other.max}" ) ) &
|
111
|
+
(self.mid == other.mid ? true : ( raise ArgumentError, "self: #{self.type}: #{self.mid} had a different mid than other: #{other.mid}" ) ) &
|
112
|
+
(self.min == other.min ? true : ( raise ArgumentError, "self: #{self.type}: #{self.min} had a different min than other: #{other.min}" ) ) &
|
113
|
+
(self.type == other.type ? true : ( raise ArgumentError, "self: #{self.type}: #{self.type}had a different type than other: #{other.type}" ) )
|
114
|
+
# ((self.value.round(2) >= self.mid) == (other.value.round(2) >= self.mid) ? true : ( raise ArgumentError, "#{self.type}: self: #{self.value} was on another side of the midpoint than other: #{other.value}, midpoint was #{self.mid}" ))
|
115
|
+
end
|
116
|
+
|
117
|
+
def constr_value_in_range(agr_val)
|
118
|
+
if !agr_val.between?(self.min,self.max)
|
119
|
+
#raise Evoc::Exceptions::AggregationError, "#{self.type}: #{agr_val} was not in range: [#{self.min},#{self.max}]"
|
120
|
+
logger.warn "#{self.type}: #{agr_val} was not in range: [#{self.min},#{self.max}]"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def constr_self_value_equal_mid
|
125
|
+
satisfied = self.value == self.mid
|
126
|
+
if satisfied
|
127
|
+
logger.warn "self value (#{self.value}) was equal to mid value (#{self.mid}) when aggregating #{self.type} measure, just returning other"
|
128
|
+
end
|
129
|
+
satisfied
|
130
|
+
end
|
131
|
+
|
132
|
+
def constr_other_value_equal_mid(other)
|
133
|
+
satisfied = other.value == self.mid
|
134
|
+
if satisfied
|
135
|
+
logger.warn "other value (#{other.value}) was equal to mid value (#{self.mid}) when aggregating #{self.type} measure, just returning self"
|
136
|
+
end
|
137
|
+
satisfied
|
138
|
+
end
|
139
|
+
|
140
|
+
def constr_other_absvalue_smaller_than_self_absvalue(self_value,other_value)
|
141
|
+
if other_value.abs > self_value.abs
|
142
|
+
#raise ArgumentError.new, "The absolute value of the right hand argument must always be smaller or equal to the absolute value of the left hand side. lhs was #{self_value}, rhs was #{other_value} (possibly normalized around 0)"
|
143
|
+
logger.warn "The absolute value of the right hand argument must always be smaller or equal to the absolute value of the left hand side. lhs was #{self_value}, rhs was #{other_value} (possibly normalized around 0)"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|