evoc 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
data/lib/evoc/logger.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# enable logging in classes through 'include Logging'
|
2
|
+
module Logging
|
3
|
+
def logger
|
4
|
+
@logger ||= Logging.logger_for(self.class.name)
|
5
|
+
end
|
6
|
+
|
7
|
+
# Use a hash class-ivar to cache a unique Logger per class:
|
8
|
+
@loggers = {}
|
9
|
+
@logger_level = 'info'
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def logger_for(classname)
|
13
|
+
@loggers[classname] ||= configure_logger_for(classname)
|
14
|
+
end
|
15
|
+
|
16
|
+
def configure_logger_for(classname)
|
17
|
+
logger = Logger.new('evoc.log','daily')
|
18
|
+
logger.progname = classname
|
19
|
+
logger.level = const_get('Logger::'+@logger_level.upcase)
|
20
|
+
logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def set_level(level)
|
24
|
+
possible_levels = %w(debug info warn error info)
|
25
|
+
if possible_levels.include?(level)
|
26
|
+
STDERR.puts "Logging level has been set to '#{level}' for output to evoc.log"
|
27
|
+
@loggers.each {|l| l.level = const_get('Logger::'+level.upcase)}
|
28
|
+
@logger_level = level
|
29
|
+
else
|
30
|
+
STDERR.puts "Unable to set logger level to #{level}, possible values are #{possible_levels}. Defaulting to 'info'."
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'objspace'
|
2
|
+
|
3
|
+
module Kernel
|
4
|
+
def tick_every sec, &pr
|
5
|
+
Thread.new do loop do
|
6
|
+
pr.call
|
7
|
+
t = Time.now.to_f
|
8
|
+
frac = t.modulo(sec.to_f)
|
9
|
+
sleep(sec - frac)
|
10
|
+
end end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
module MemoryProfiler
|
15
|
+
|
16
|
+
@@thread = nil
|
17
|
+
|
18
|
+
##
|
19
|
+
# @param [string] tag tag to add to dumped files
|
20
|
+
# @oaram [int] n dump every n seconds
|
21
|
+
def self.start(tag,n)
|
22
|
+
dump_dir = 'mem_dumps'
|
23
|
+
index = "#{dump_dir}/#{tag}-index.txt"
|
24
|
+
ObjectSpace.trace_object_allocations_start
|
25
|
+
if !Dir.exists?(dump_dir)
|
26
|
+
Dir.mkdir dump_dir
|
27
|
+
end
|
28
|
+
File.open(index,"w")
|
29
|
+
@@thread = tick_every(n) do
|
30
|
+
GC.start
|
31
|
+
i = Time.now.strftime('%Y-%m-%dT%H:%M:%S')
|
32
|
+
dump = "#{tag}-#{i}.dump"
|
33
|
+
dump_path = "#{dump_dir}/#{dump}"
|
34
|
+
ObjectSpace.dump_all(output: open(dump_path, "w"))
|
35
|
+
File.open(index,'a') {|index| index.puts "#{dump_path},#{i}" }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.stop
|
40
|
+
@@thread.kill
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module Evoc
|
2
|
+
class RecommendationCache
|
3
|
+
extend Logging
|
4
|
+
|
5
|
+
# create accessors for class level instance variables
|
6
|
+
#
|
7
|
+
# tag: the string representation of the currently cached recommendation
|
8
|
+
# recommendation: the currently cached recommendation
|
9
|
+
# time: the time it took to generate the currently cached recommendation
|
10
|
+
# model_size: the number of transactions used when generating the currently cached recommendation
|
11
|
+
class << self
|
12
|
+
attr_accessor :tag, :base_recommendation, :last_recommendation, :time_rulegeneration, :time_measurecalculation, :time_aggregation, :time_evaluation, :filtered_model_size, :evaluation
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.recommendation_cached?(algorithm:,
|
16
|
+
query:,
|
17
|
+
model_start:,
|
18
|
+
model_end:,
|
19
|
+
max_size: nil)
|
20
|
+
return self.tag == [algorithm,query,model_start,model_end,max_size].hash
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.get_recommendation(algorithm:,
|
24
|
+
query:,
|
25
|
+
model_start:,
|
26
|
+
model_end:,
|
27
|
+
max_size: nil,
|
28
|
+
aggregator: nil,
|
29
|
+
measures: [])
|
30
|
+
# check if a new base recommendation needs to be generated
|
31
|
+
tag = [algorithm,query,model_start,model_end,max_size].hash
|
32
|
+
if self.tag != tag
|
33
|
+
# clear out any evaluation done
|
34
|
+
self.evaluation = Hash.new
|
35
|
+
# new recommendation
|
36
|
+
logger.debug "Caching new recommendation: algorithm: #{algorithm}, query: #{query}, model_start/end: #{model_start} - #{model_end}, maxsize: #{max_size}"
|
37
|
+
self.tag = tag
|
38
|
+
tx_store = Evoc::HistoryStore.get_history(model_start,
|
39
|
+
model_end,
|
40
|
+
max_size)
|
41
|
+
self.filtered_model_size = tx_store.size
|
42
|
+
|
43
|
+
t1 = Time.new
|
44
|
+
self.base_recommendation = Evoc::Algorithm.execute(tx_store: tx_store,
|
45
|
+
query: query,
|
46
|
+
algorithm: algorithm)
|
47
|
+
self.last_recommendation = self.base_recommendation
|
48
|
+
t2 = Time.new
|
49
|
+
self.time_rulegeneration = TimeDifference.between(t1,t2).in_seconds.round(8)
|
50
|
+
end
|
51
|
+
|
52
|
+
# calculate measures on rules
|
53
|
+
t1 = Time.new
|
54
|
+
self.base_recommendation.calculate_measures(measures)
|
55
|
+
t2 = Time.new
|
56
|
+
self.time_measurecalculation = TimeDifference.between(t1,t2).in_seconds.round(8)
|
57
|
+
|
58
|
+
# perform aggregation
|
59
|
+
if !aggregator.nil?
|
60
|
+
t1 = Time.new
|
61
|
+
self.last_recommendation = self.base_recommendation.aggregate_by(aggregator: aggregator.to_sym,measures: measures) {|r| r.rhs}
|
62
|
+
t2 = Time.new
|
63
|
+
self.time_aggregation = TimeDifference.between(t1,t2).in_seconds.round(8)
|
64
|
+
else
|
65
|
+
self.last_recommendation = self.base_recommendation
|
66
|
+
end
|
67
|
+
return self.last_recommendation
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Evaluate the currently cached recommendation
|
72
|
+
#
|
73
|
+
# @param [Array<String>] evaluators the evaluators to apply
|
74
|
+
# @param [Array<String>] expected_outcome the expected outcome to use in evaluations
|
75
|
+
# @param [Array<String>] measure_combinations the list of measures to use when sorting a recommendation before evaluating
|
76
|
+
#
|
77
|
+
# @return [Hash[aggregator][evaluator][result]] the hash of results
|
78
|
+
def self.evaluate(evaluators: ,expected_outcome:,measure_combination: )
|
79
|
+
if !self.last_recommendation.nil?
|
80
|
+
t1 = Time.new
|
81
|
+
evaluators.each do |evaluator|
|
82
|
+
self.evaluation[evaluator] = self.last_recommendation.evaluate_with(evaluator: evaluator,expected_outcome: expected_outcome,measure_combination: measure_combination)
|
83
|
+
end
|
84
|
+
t2 = Time.new
|
85
|
+
self.time_evaluation = TimeDifference.between(t1,t2).in_seconds.round(8)
|
86
|
+
else
|
87
|
+
STDERR.puts "TAG = #{self.tag}No recommendation to evaluate"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# format:
|
93
|
+
# {
|
94
|
+
# time: 'execution time',
|
95
|
+
# filtered_model_size:
|
96
|
+
# number_of_rules :
|
97
|
+
# evaluation: {
|
98
|
+
# average_precision: ..,
|
99
|
+
# ..next evaluator..
|
100
|
+
# }
|
101
|
+
# rules: [
|
102
|
+
# {
|
103
|
+
# lhs: [lhs]
|
104
|
+
# rhs: [rhs],
|
105
|
+
# measures: {
|
106
|
+
# measure_1: value,
|
107
|
+
# measure_n: value
|
108
|
+
# }
|
109
|
+
# },
|
110
|
+
# ..next rule..
|
111
|
+
# ]
|
112
|
+
# }
|
113
|
+
#
|
114
|
+
# measures: the interestingness measures that you want to output in the hash
|
115
|
+
def self.to_h(measures: Evoc::Rule.measures)
|
116
|
+
recommendation_hash = Hash.new
|
117
|
+
recommendation_hash[:recommendation_tag] = self.tag
|
118
|
+
recommendation_hash[:time_rulegeneration] = self.time_rulegeneration
|
119
|
+
recommendation_hash[:time_measurecalculation] = self.time_measurecalculation
|
120
|
+
recommendation_hash[:time_aggregation] = self.time_aggregation
|
121
|
+
recommendation_hash[:time_evaluation] = self.time_evaluation
|
122
|
+
recommendation_hash[:filtered_model_size] = self.filtered_model_size
|
123
|
+
recommendation_hash[:number_of_baserules] = self.base_recommendation.size
|
124
|
+
recommendation_hash[:number_of_rules] = self.last_recommendation.size
|
125
|
+
recommendation_hash[:aggregator] = self.last_recommendation.aggregator
|
126
|
+
recommendation_hash[:number_of_hyper_rules] = self.last_recommendation.number_of_hyper_rules
|
127
|
+
recommendation_hash[:mean_hyper_coefficient] = self.last_recommendation.inject(0.0){ |sum, r|
|
128
|
+
sum + r.get_measure('m_hyper_coefficient').value } / self.last_recommendation.size
|
129
|
+
recommendation_hash[:largest_antecedent] = self.last_recommendation.largest_antecedent
|
130
|
+
if !self.evaluation.nil?
|
131
|
+
self.evaluation.each do |evaluator,value|
|
132
|
+
recommendation_hash[evaluator.to_sym] = value[:value]
|
133
|
+
recommendation_hash[:unique_consequents] = value[:unique_consequents]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
recommendation_hash[:rules] = []
|
137
|
+
self.last_recommendation.each do |rule|
|
138
|
+
rule_hash = Hash.new
|
139
|
+
rule_hash[:lhs] = rule.lhs.is_a?(String) ? rule.lhs : rule.lhs.join(',')
|
140
|
+
rule_hash[:rhs] = rule.rhs.is_a?(String) ? rule.rhs : rule.rhs.join(',')
|
141
|
+
rule_hash[:measures] = Hash.new
|
142
|
+
measures.each do |m|
|
143
|
+
if rule.measure_instantiated?(m)
|
144
|
+
rule_hash[:measures][m] = rule.get_measure(m).value
|
145
|
+
end
|
146
|
+
end
|
147
|
+
recommendation_hash[:rules] << rule_hash
|
148
|
+
end
|
149
|
+
return recommendation_hash
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
data/lib/evoc/rule.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Rule
|
3
|
+
include InterestingnessMeasures, Logging, Comparable
|
4
|
+
attr_accessor :lhs, :rhs, :name, :tx_store
|
5
|
+
|
6
|
+
def initialize(lhs:,rhs:,tx_store: nil,**measures)
|
7
|
+
self.lhs = lhs.is_a?(Array) ? lhs.sort : [lhs]
|
8
|
+
self.rhs = rhs.is_a?(Array) ? rhs.sort : [rhs]
|
9
|
+
self.name = "#{self.lhs.join(",")} -> #{self.rhs.join(",")}"
|
10
|
+
self.tx_store = tx_store
|
11
|
+
measures.each do |measure,value|
|
12
|
+
set_measure(measure,value)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def <=> other
|
17
|
+
other.name <=> self.name
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
name
|
22
|
+
end
|
23
|
+
|
24
|
+
def lhs=input
|
25
|
+
input.is_a?(Array) ? @lhs = input : @lhs = [input]
|
26
|
+
end
|
27
|
+
|
28
|
+
def rhs=input
|
29
|
+
input.is_a?(Array) ? @rhs = input : @rhs = [input]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,340 @@
|
|
1
|
+
module Evoc
|
2
|
+
class RuleStore
|
3
|
+
include Enumerable, Logging
|
4
|
+
attr_accessor :query, :rules, :aggregator
|
5
|
+
|
6
|
+
def initialize(rules = [],query: nil,aggregator: nil)
|
7
|
+
self.rules = rules
|
8
|
+
self.query = query
|
9
|
+
self.aggregator = aggregator
|
10
|
+
end
|
11
|
+
|
12
|
+
##
|
13
|
+
# CLASS METHODS
|
14
|
+
##
|
15
|
+
|
16
|
+
def self.parse_file(path_to_rules)
|
17
|
+
rule_store = Evoc::RuleStore.new
|
18
|
+
CSV.foreach(path_to_rules, :headers => true) do |row|
|
19
|
+
params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE)
|
20
|
+
rule = Evoc::Rule.new(params)
|
21
|
+
rule_store << rule
|
22
|
+
end
|
23
|
+
rule_store
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.parse_string(string)
|
27
|
+
rule_store = Evoc::RuleStore.new
|
28
|
+
CSV.parse(string, :headers => true) do |row|
|
29
|
+
params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE)
|
30
|
+
rule = Evoc::Rule.new(params)
|
31
|
+
rule_store << rule
|
32
|
+
end
|
33
|
+
rule_store
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# \CLASS METHODS
|
38
|
+
##
|
39
|
+
|
40
|
+
# required by Enumerable
|
41
|
+
def each &block
|
42
|
+
@rules.each do |rule|
|
43
|
+
if block_given?
|
44
|
+
block.call rule
|
45
|
+
else
|
46
|
+
yield rule
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def [] index
|
52
|
+
@rules[index]
|
53
|
+
end
|
54
|
+
|
55
|
+
def []=(index,value)
|
56
|
+
@rules[index] = value
|
57
|
+
end
|
58
|
+
|
59
|
+
def group_by(&block)
|
60
|
+
res = Hash.new { |hash, key| hash[key] = [] }
|
61
|
+
each do |e|
|
62
|
+
res[block.call(e)] << e
|
63
|
+
end
|
64
|
+
res
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Calculates the requested measures on the current rule set
|
69
|
+
# @param measures [Array<Symbol>] the set of measures to calculate
|
70
|
+
def calculate_measures(measures)
|
71
|
+
if measures.nil?
|
72
|
+
raise ArgumentError.new, "Tried to calculate measures, but list of measures was 'nil'"
|
73
|
+
else
|
74
|
+
self.each do |rule|
|
75
|
+
measures.each do |m|
|
76
|
+
rule.get_measure(m)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
##
|
83
|
+
# Aggregates the current set of rules using the given aggregator over the rule clusters specified by the given block
|
84
|
+
#
|
85
|
+
# @param: [Symbol] aggregator the name of a defined aggregator function
|
86
|
+
# @param: [Array<String>] measures the measures to aggregate
|
87
|
+
# @param: [block] define the rules clusters which should be aggregated
|
88
|
+
def aggregate_by(aggregator: ,measures:,&block)
|
89
|
+
rule_clusters = group_by(&block)
|
90
|
+
# remove clusters with only one item
|
91
|
+
#aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size > 1}
|
92
|
+
#non_aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size == 1}
|
93
|
+
aggregation = Evoc::RuleStore.new(query: self.query, aggregator: aggregator)
|
94
|
+
# aggregate the rules in each cluster
|
95
|
+
rule_clusters.each do |_,cluster|
|
96
|
+
# we create one aggregated rule from each rule cluster
|
97
|
+
if cluster.size > 1
|
98
|
+
aggregation << Evoc::HyperRule.new(cluster,aggregator,measures)
|
99
|
+
else
|
100
|
+
aggregation << cluster.first
|
101
|
+
end
|
102
|
+
end
|
103
|
+
return aggregation
|
104
|
+
end
|
105
|
+
|
106
|
+
##
|
107
|
+
# @return the number of hyper rules in this store
|
108
|
+
def number_of_hyper_rules
|
109
|
+
self.hyper_rules.size
|
110
|
+
end
|
111
|
+
|
112
|
+
##
|
113
|
+
# @return the hyper rules in the store
|
114
|
+
def hyper_rules
|
115
|
+
self.select {|r| r.is_a?(Evoc::HyperRule)}
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
##
|
120
|
+
# Evaluate this recommendation using the given evaluator
|
121
|
+
#
|
122
|
+
# Note that the hyper coefficient is added as a last tie breaker for
|
123
|
+
# aggregation functions called with '*aggregator*_hc'
|
124
|
+
# Not pretty, sorry..
|
125
|
+
#
|
126
|
+
# @param [String] evaluator the method to use for evaluating
|
127
|
+
# @param [Array] expected_outcome the list of items to evaluate against
|
128
|
+
# @param [Array] measure_combination the list of measures used to first sort the recommendation
|
129
|
+
def evaluate_with(evaluator: :average_precision,expected_outcome:,measure_combination: )
|
130
|
+
if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end
|
131
|
+
result = Hash.new
|
132
|
+
logger.debug "#{__method__} params: evaluator: #{evaluator}, measure_combination: #{measure_combination}"
|
133
|
+
# sort the rules on each combination and evaluate
|
134
|
+
sorted_rules = []
|
135
|
+
if self.aggregator =~ /_hc\z/
|
136
|
+
sorted_rules = self.sort_on(measures: measure_combination+['m_hyper_coefficient'])
|
137
|
+
elsif !self.aggregator.nil?
|
138
|
+
sorted_rules = self.sort_on(measures: measure_combination)
|
139
|
+
else
|
140
|
+
# not an aggregation
|
141
|
+
# get the strongest unique rules
|
142
|
+
unique_rules = self.unique_by(measure_combination.first)
|
143
|
+
sorted_rules = self.sort_on(rules: unique_rules,measures: measure_combination)
|
144
|
+
end
|
145
|
+
# get the recommended items
|
146
|
+
recommendation = sorted_rules.map(&:rhs)
|
147
|
+
# evaluate the sorted list against the expected outcome
|
148
|
+
result[:value] = Evoc::Evaluate.execute(recommendation,expected_outcome,evaluator)
|
149
|
+
return result
|
150
|
+
end
|
151
|
+
|
152
|
+
##
|
153
|
+
# Sort rules on one or more measures
|
154
|
+
# If a measure is undefined/nil for a rule, we treat it as -infinity
|
155
|
+
# for purposes of sorting
|
156
|
+
# @param: [Array<String>] measures the list of measures to sort by
|
157
|
+
def sort_on(rules: self, measures:)
|
158
|
+
rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}}
|
159
|
+
end
|
160
|
+
|
161
|
+
##
|
162
|
+
# returns the set of unique consequents
|
163
|
+
# where each consequent is the strongest given by the input measure
|
164
|
+
#
|
165
|
+
# @param: [String] measure the measure used to find the strongest rules
|
166
|
+
def unique_by(measure)
|
167
|
+
selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
|
168
|
+
self.each do |rule|
|
169
|
+
if !rule.get_measure(measure).value.nil?
|
170
|
+
key = rule.rhs.first
|
171
|
+
if selected_rules[key].nil?
|
172
|
+
selected_rules[key] = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToRuby.new : Hash.new
|
173
|
+
selected_rules[key][:value] = rule.get_measure(measure).value
|
174
|
+
selected_rules[key][:rule] = rule
|
175
|
+
end
|
176
|
+
if rule.get_measure(measure).value > selected_rules[key][:value]
|
177
|
+
selected_rules[key][:value] = rule.get_measure(measure).value
|
178
|
+
selected_rules[key][:rule] = rule
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
return selected_rules.values.map {|k,v| k[:rule]}
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# @return [Integer] the size of the largest rule, measures by antecedent size
|
187
|
+
def largest_antecedent
|
188
|
+
self.map {|r| r.lhs.size}.max
|
189
|
+
end
|
190
|
+
|
191
|
+
##
|
192
|
+
# @return [True/False/Nil] if the lhs of one of the rules is equal to the query
|
193
|
+
def exact_match
|
194
|
+
match = nil
|
195
|
+
if !self.query.nil?
|
196
|
+
match = false
|
197
|
+
self.each do |rule|
|
198
|
+
if (rule.lhs.sort == self.query.sort)
|
199
|
+
match = true
|
200
|
+
break
|
201
|
+
end
|
202
|
+
end
|
203
|
+
else
|
204
|
+
logger.debug "Tried to calculate exact match, but query was nil "
|
205
|
+
end
|
206
|
+
return match
|
207
|
+
end
|
208
|
+
|
209
|
+
|
210
|
+
def << rule
|
211
|
+
self.rules << rule
|
212
|
+
end
|
213
|
+
|
214
|
+
def pretty_print
|
215
|
+
CSV.generate do |csv|
|
216
|
+
# write header
|
217
|
+
defined_measures = []
|
218
|
+
if aggregator.nil?
|
219
|
+
defined_measures = self.map {|r| r.instantiated_measures}.array_union
|
220
|
+
else
|
221
|
+
defined_measures = self.hyper_rules.map {|r| r.instantiated_measures}.array_union
|
222
|
+
end
|
223
|
+
csv << ['rule'] + defined_measures
|
224
|
+
self.each do |rule|
|
225
|
+
row = CSV::Row.new([],[],false)
|
226
|
+
row << rule.name
|
227
|
+
defined_measures.each do |m|
|
228
|
+
row << rule.get_measure(m).value
|
229
|
+
end
|
230
|
+
csv << row
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
def to_s
|
236
|
+
CSV.generate do |csv|
|
237
|
+
# write header
|
238
|
+
csv << ['lhs','rhs'] + Evoc::Rule.measures
|
239
|
+
self.each do |rule|
|
240
|
+
row = CSV::Row.new([],[],false)
|
241
|
+
row << (rule.lhs.respond_to?(:join) ? rule.lhs.join(',') : rule.lhs)
|
242
|
+
row << (rule.rhs.respond_to?(:join) ? rule.rhs.join(',') : rule.rhs)
|
243
|
+
Evoc::Rule.measures.each do |m|
|
244
|
+
row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil)
|
245
|
+
end
|
246
|
+
csv << row
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def print(measures = Evoc::Rule.measures)
|
252
|
+
CSV {|row| row << ['lhs','rhs'] + measures}
|
253
|
+
if self.size > 0
|
254
|
+
name_mapping = self.first.tx_store.int_2_name
|
255
|
+
self.sort_on(measures: measures).each do |rule|
|
256
|
+
row = CSV::Row.new([],[],false)
|
257
|
+
row << rule.lhs.map{|i| name_mapping[i]}.join(',')
|
258
|
+
row << rule.rhs.map{|i| name_mapping[i]}.join(',')
|
259
|
+
measures.each do |m|
|
260
|
+
row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil)
|
261
|
+
end
|
262
|
+
CSV {|r| r << row}
|
263
|
+
end
|
264
|
+
end
|
265
|
+
nil
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
##
|
270
|
+
# Print the current rule set to a csv file
|
271
|
+
# @param measures [Array<String>] the measures to include in output. Default is all measures.
|
272
|
+
# @param file [String] the file to write to.
|
273
|
+
def print_to_file(measures: Evoc::Rule.measures, file:)
|
274
|
+
CSV.open(file, "wb") do |csv|
|
275
|
+
# write header
|
276
|
+
csv << ['lhs','rhs'] + measures
|
277
|
+
self.each do |rule|
|
278
|
+
row = CSV::Row.new([],[],false)
|
279
|
+
row << rule.lhs.join(',')
|
280
|
+
row << rule.rhs.join(',')
|
281
|
+
measures.each do |m|
|
282
|
+
row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil)
|
283
|
+
end
|
284
|
+
csv << row
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
|
289
|
+
def ==other
|
290
|
+
( self.map(&:lhs) == other.map(&:lhs) ) &
|
291
|
+
( self.map(&:rhs) == other.map(&:rhs) ) &
|
292
|
+
Evoc::Rule.measures.each do |m|
|
293
|
+
self.map {|r| r.get_measure(m)} == other.map {|r| r.get_measure(m)}
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
def size
|
298
|
+
self.rules.size
|
299
|
+
end
|
300
|
+
|
301
|
+
def empty?
|
302
|
+
self.rules.empty?
|
303
|
+
end
|
304
|
+
|
305
|
+
def clear
|
306
|
+
self.rules.clear
|
307
|
+
end
|
308
|
+
|
309
|
+
def to_h
|
310
|
+
if self.rules.nil?
|
311
|
+
{}
|
312
|
+
else
|
313
|
+
self.rules.map {|r|
|
314
|
+
h = Hash.new
|
315
|
+
h[:lhs] = r.lhs
|
316
|
+
h[:rhs] = r.rhs
|
317
|
+
r.instantiated_measures.each {|m| h[m] = r.get_measure(m).value.to_r}
|
318
|
+
h}
|
319
|
+
end
|
320
|
+
end
|
321
|
+
|
322
|
+
def instance_values_for_csv
|
323
|
+
dont_include = ['rules']
|
324
|
+
self.instance_values.delete_if {|k,v| dont_include.include?(k)}
|
325
|
+
end
|
326
|
+
|
327
|
+
##
|
328
|
+
# generate an array suitable for a csv header
|
329
|
+
def csv_header
|
330
|
+
self.instance_values_for_csv.keys
|
331
|
+
end
|
332
|
+
|
333
|
+
##
|
334
|
+
# generate an array of the current values of <self>
|
335
|
+
# converts any array values to a comma separated string representation
|
336
|
+
def to_csv_row
|
337
|
+
self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|