evoc 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
@@ -0,0 +1,303 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Scenario
|
3
|
+
include Comparable, Logging
|
4
|
+
|
5
|
+
attr_accessor :case_id,
|
6
|
+
:granularity,
|
7
|
+
:scenario_id,
|
8
|
+
:tx,
|
9
|
+
:algorithm,
|
10
|
+
:measures,
|
11
|
+
:aggregator,
|
12
|
+
:tx_index,
|
13
|
+
:tx_id,
|
14
|
+
:permutation,
|
15
|
+
:query,
|
16
|
+
:model_size,
|
17
|
+
:model_age,
|
18
|
+
:max_size,
|
19
|
+
:opts
|
20
|
+
|
21
|
+
def initialize(opts = Hash.new)
|
22
|
+
logger.debug "Initialized new scenario with configuration: #{opts}"
|
23
|
+
self.opts = opts
|
24
|
+
self.scenario_id = opts.hash
|
25
|
+
|
26
|
+
# model_size depends on model_age, so set model_age first
|
27
|
+
self.model_age = opts[:model_age]
|
28
|
+
opts.each do |attribute,value|
|
29
|
+
self.send("#{attribute}=", value)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
##
|
35
|
+
# <=> defines how to compare two Query objects
|
36
|
+
def <=> other
|
37
|
+
return nil unless other.is_a?(Query)
|
38
|
+
comparison = 0
|
39
|
+
# first we compare the tx id
|
40
|
+
if (self.tx_id <=> other.tx_id) == 0
|
41
|
+
# if we also have the same query
|
42
|
+
if (self.query.sort <=> other.query.sort) == 0
|
43
|
+
# use history size as comparator
|
44
|
+
comparison = (self.model_size <=> other.model_size)
|
45
|
+
else
|
46
|
+
# use the query
|
47
|
+
comparison = (self.query.sort <=> other.query.sort)
|
48
|
+
end
|
49
|
+
else
|
50
|
+
# use the tx id
|
51
|
+
comparison = (self.tx_id <=> other.tx_id)
|
52
|
+
end
|
53
|
+
comparison
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Executes a query given the current paramaters
|
58
|
+
# This results in a set of association rules, i.e., a recommendation
|
59
|
+
#
|
60
|
+
# Producing a recommendation is done through the following process:
|
61
|
+
#
|
62
|
+
# 1. Generate rules using a mining algorithm on the specified history
|
63
|
+
# 2. Calculate interestingness measures on the generated rules
|
64
|
+
# (optional) 3. Aggregate rules to further improve recommendation
|
65
|
+
# (optional) 4. Evaluate how good the recommendation is
|
66
|
+
#
|
67
|
+
# @return [Hash] containing the query + scenario + recommendation + other metadata
|
68
|
+
def call(evaluators: [])
|
69
|
+
#generate recommendation in cache (generate rules + measures on rules)
|
70
|
+
self.recommendation
|
71
|
+
|
72
|
+
# evaluate if requested
|
73
|
+
if !evaluators.empty?
|
74
|
+
Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
|
75
|
+
end
|
76
|
+
# build return hash
|
77
|
+
recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
|
78
|
+
return self.to_h.merge(recommendation)
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_h
|
82
|
+
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
|
83
|
+
hash = Hash.new
|
84
|
+
fields.each do |key|
|
85
|
+
value = self.method(key).call
|
86
|
+
hash[key] = value.is_a?(Array) ? value.join(',') : value
|
87
|
+
end
|
88
|
+
return hash
|
89
|
+
end
|
90
|
+
|
91
|
+
def recommendation
|
92
|
+
Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
|
93
|
+
query: self.query,
|
94
|
+
model_start: self.model_start,
|
95
|
+
model_end: self.model_end,
|
96
|
+
max_size: self.max_size,
|
97
|
+
aggregator: self.aggregator,
|
98
|
+
measures: self.measures)
|
99
|
+
end
|
100
|
+
|
101
|
+
def recommendation?
|
102
|
+
Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
|
103
|
+
query: self.query,
|
104
|
+
model_start: self.model_start,
|
105
|
+
model_end: self.model_end,
|
106
|
+
max_size: self.max_size)
|
107
|
+
end
|
108
|
+
|
109
|
+
def to_s
|
110
|
+
self.opts
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
##
|
115
|
+
# CUSTOM SETTERS AND GETTERS
|
116
|
+
#
|
117
|
+
|
118
|
+
##
|
119
|
+
# @return [Evoc::Tx] the transaction of this scenario
|
120
|
+
def tx
|
121
|
+
tx = nil
|
122
|
+
if !self.tx_id.nil?
|
123
|
+
tx = Evoc::HistoryStore.base_history.get_tx(id: self.tx_id, id_type: :id)
|
124
|
+
end
|
125
|
+
return tx
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# @return [Integer] the size of the query
|
130
|
+
def query_size
|
131
|
+
self.query.size
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# @return [Float] the percentage of the whole history that is used in the model
|
136
|
+
def model_percentage
|
137
|
+
self.model_size.to_i == 0 ? 100 : ((self.model_size.to_f/self.tx_index)*100).round(2)
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# Sets the model size in this scenario
|
142
|
+
#
|
143
|
+
# If set to 0 (zero), the maximum possible size is calculated
|
144
|
+
# In this case, model age will be taken into account
|
145
|
+
#
|
146
|
+
# @param [Integer] the size of the model
|
147
|
+
def model_size=(size)
|
148
|
+
if size.to_i == 0
|
149
|
+
@model_size = (self.model_age.nil? ? self.tx_index : self.tx_index - self.model_age)
|
150
|
+
else
|
151
|
+
@model_size = size.to_i
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
##
|
156
|
+
# @return [Integer] the index in the history where the model starts
|
157
|
+
def model_start
|
158
|
+
value = self.tx_index - self.model_size.to_i - self.model_age.to_i
|
159
|
+
if value < 0
|
160
|
+
raise ArgumentError, "The model start index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
|
161
|
+
end
|
162
|
+
return value
|
163
|
+
end
|
164
|
+
|
165
|
+
##
|
166
|
+
# @return [Integer] the index in the history where the model ends
|
167
|
+
def model_end
|
168
|
+
value = (self.tx_index - 1 - self.model_age.to_i)
|
169
|
+
if value < 0
|
170
|
+
raise ArgumentError, "The model end index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
|
171
|
+
elsif value < self.model_start
|
172
|
+
raise ArgumentError, "The model end was before the model start (start: #{self.model_start}, end: #{value})"
|
173
|
+
end
|
174
|
+
return value
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# @return [Integer] the time between the first and last transaction in the model
|
179
|
+
def model_hours
|
180
|
+
model_end_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_end)
|
181
|
+
model_start_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_start)
|
182
|
+
return TimeDifference.between(model_start_tx.date,model_end_tx.date).in_hours
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# query_percentage
|
187
|
+
def query_percentage
|
188
|
+
total_items = self.query.size.to_i + self.expected_outcome.size
|
189
|
+
(self.query.size.to_f/total_items*100).round
|
190
|
+
end
|
191
|
+
|
192
|
+
|
193
|
+
##
|
194
|
+
# max_size
|
195
|
+
def max_size=(value)
|
196
|
+
if !value.nil?
|
197
|
+
if value.respond_to?(:to_i)
|
198
|
+
@max_size = value.to_i
|
199
|
+
else
|
200
|
+
raise ArgumentError.new, "value for max_size could not be converted to integer, value was: #{value}"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
##
|
206
|
+
# custom setter for query
|
207
|
+
def query=(query)
|
208
|
+
if !query.nil?
|
209
|
+
@query = query
|
210
|
+
# convert to list of integers
|
211
|
+
@query.map!(&:to_i)
|
212
|
+
if !self.tx.nil?
|
213
|
+
if @query.size >= (self.tx.size)
|
214
|
+
$stderr.warn "The query was larger than or equal to the size of the transaction (Qs: #{@query.size}, Tx size: #{self.tx.size})"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
if @query.empty?
|
218
|
+
raise ArgumentError, "The query was empty"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
##
|
224
|
+
# Custom setter for measures
|
225
|
+
def measures=(measures)
|
226
|
+
if !measures.nil?
|
227
|
+
# internally, all interestingness measures are
|
228
|
+
# represented as :m_measurename (symbols), so we concatinate
|
229
|
+
# 'm_' and symbolize the list of given measures
|
230
|
+
if measures.is_a?(String)
|
231
|
+
measures = measures.split(',')
|
232
|
+
end
|
233
|
+
@measures = measures.map {|m| (/\Am_/ =~ m).nil? ? ('m_'+m).to_sym : m.to_sym}
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
##
|
238
|
+
# @return [Array] the list of expected items
|
239
|
+
def expected_outcome
|
240
|
+
expected_outcome = (self.tx.items - self.query)
|
241
|
+
if expected_outcome.empty?
|
242
|
+
logger.warn "The expected outcome was empty"
|
243
|
+
end
|
244
|
+
return expected_outcome
|
245
|
+
end
|
246
|
+
|
247
|
+
##
|
248
|
+
# @return [Integer] the size of the expected outcome
|
249
|
+
def expected_outcome_size
|
250
|
+
self.expected_outcome.size
|
251
|
+
end
|
252
|
+
|
253
|
+
##
|
254
|
+
# @return [String] the history index of this scenarios transaction
|
255
|
+
def tx_index
|
256
|
+
self.tx.index
|
257
|
+
end
|
258
|
+
|
259
|
+
##
|
260
|
+
# @return [Integer] the size of this scenarios transaction
|
261
|
+
def tx_size
|
262
|
+
self.tx.size
|
263
|
+
end
|
264
|
+
|
265
|
+
##
|
266
|
+
#
|
267
|
+
def instance_values_for_csv
|
268
|
+
dont_include = ['opts', 'logger','time','filtered_model_size']
|
269
|
+
self.instance_values.delete_if {|k,v| dont_include.include?(k)}
|
270
|
+
end
|
271
|
+
|
272
|
+
##
|
273
|
+
# generate an array suitable for a csv header
|
274
|
+
def csv_header
|
275
|
+
query = self.instance_values_for_csv.keys
|
276
|
+
rule_store = !self.recommendation? ? [] : self.recommendation.csv_header
|
277
|
+
rule_store + query
|
278
|
+
end
|
279
|
+
|
280
|
+
##
|
281
|
+
# generate an array of the current values of <self>
|
282
|
+
# converts any array values to a comma separated string representation
|
283
|
+
def to_csv_row
|
284
|
+
query = self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
|
285
|
+
rule_store = !self.recommendation? ? [] : self.recommendation.to_csv_row
|
286
|
+
rule_store + query
|
287
|
+
end
|
288
|
+
|
289
|
+
|
290
|
+
|
291
|
+
##
|
292
|
+
# Prints the rules to standard out
|
293
|
+
# sorted by strength
|
294
|
+
def print
|
295
|
+
if !self.recommendation?
|
296
|
+
$stdout.puts ""
|
297
|
+
else
|
298
|
+
self.recommendation.print(measures)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
end
|
303
|
+
end
|
data/lib/evoc/svd.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
module Evoc
|
2
|
+
##
|
3
|
+
# CLASS SVD
|
4
|
+
#
|
5
|
+
# public fields:
|
6
|
+
# index:
|
7
|
+
# hash of {index -> file}, where file is at "index" in the co_change_matrix and svd matrix
|
8
|
+
class SVD
|
9
|
+
attr_accessor :co_change_matrix, :u, :s, :v
|
10
|
+
|
11
|
+
def initialize(tx_store = nil)
|
12
|
+
if tx_store.is_a?(Evoc::TxStore)
|
13
|
+
@index2item,@item2index,@co_change_matrix = txstore_2_co_change_matrix(tx_store)
|
14
|
+
@u,@s,@v = svd(@co_change_matrix)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def indexes
|
19
|
+
@index2item.keys
|
20
|
+
end
|
21
|
+
|
22
|
+
def items
|
23
|
+
@item2index.keys
|
24
|
+
end
|
25
|
+
|
26
|
+
def index2item index
|
27
|
+
@index2item[index]
|
28
|
+
end
|
29
|
+
|
30
|
+
def item2index item
|
31
|
+
@item2index[item]
|
32
|
+
end
|
33
|
+
|
34
|
+
def svd(co_change_matrix)
|
35
|
+
u,s,v = co_change_matrix.gesvd
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Find the clusters in the current svd given a change-vector/query
|
40
|
+
#
|
41
|
+
# threshold: the minimum value of an element in the U matrix,
|
42
|
+
# to be considered as part of an cluster
|
43
|
+
def clusters(query,threshold = 0)
|
44
|
+
clusters = Hash.new
|
45
|
+
perfect_match = []
|
46
|
+
query_indexes = query.map {|q_item| item2index(q_item)}.compact #remove nil values
|
47
|
+
col_index = 0
|
48
|
+
self.u.each_column do |col|
|
49
|
+
#initiate cluster
|
50
|
+
clusters[col_index] = {pos: {query_match: [], clustered: []},
|
51
|
+
neg: {query_match: [], clustered: []}}
|
52
|
+
# get the column of the item
|
53
|
+
col.each_with_index do |row_item,row_index|
|
54
|
+
# check that the row item is part of cluster
|
55
|
+
if row_item.abs > threshold
|
56
|
+
sign = row_item > 0 ? :pos : :neg
|
57
|
+
# check if its another item from the query
|
58
|
+
if query_indexes.include? row_index
|
59
|
+
clusters[col_index][sign][:query_match] << index2item(row_index)
|
60
|
+
# check if all items in the cluster was in the query (perfect match)
|
61
|
+
if clusters[col_index][sign][:query_match].size == query.size
|
62
|
+
perfect_match << [col_index,sign]
|
63
|
+
end
|
64
|
+
else
|
65
|
+
clusters[col_index][sign][:clustered] << [index2item(row_index),row_item]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
col_index += 1
|
70
|
+
end
|
71
|
+
[perfect_match,clusters]
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
##
|
76
|
+
# Returns the co-change matrix of currently loaded files
|
77
|
+
# each column/row combination specifies how many times the
|
78
|
+
# two files changed together
|
79
|
+
#
|
80
|
+
# f1 f2 f3
|
81
|
+
# f1 2 1 1
|
82
|
+
# f2 1 3 1
|
83
|
+
# f3 1 1 1
|
84
|
+
def txstore_2_co_change_matrix(tx_store)
|
85
|
+
co_change_hash = Hash.new
|
86
|
+
tx_store.each do |tx|
|
87
|
+
tx.items.each do |file_id|
|
88
|
+
if co_change_hash[file_id] == nil
|
89
|
+
co_change_hash[file_id] = {:co_changed => Hash.new}
|
90
|
+
end
|
91
|
+
tx.items.each do |co_changed_file_id|
|
92
|
+
if co_change_hash[file_id][:co_changed][co_changed_file_id] == nil
|
93
|
+
co_change_hash[file_id][:co_changed][co_changed_file_id] = 1
|
94
|
+
else
|
95
|
+
co_change_hash[file_id][:co_changed][co_changed_file_id] += 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
# add indexes
|
101
|
+
co_change_hash.each_with_index do |(key,value),index|
|
102
|
+
co_change_hash[key][:index] = index
|
103
|
+
end
|
104
|
+
# Generate the 2 wise dependency weight array
|
105
|
+
#
|
106
|
+
n = co_change_hash.size
|
107
|
+
co_change_matrix = NMatrix.new(n,0,dtype: :float64)
|
108
|
+
co_change_hash.each_with_index do |(key,value),index|
|
109
|
+
this_file = index
|
110
|
+
value[:co_changed].each do |(co_changed_file,sum_co_changes)|
|
111
|
+
co_index = (co_change_hash[co_changed_file][:index])
|
112
|
+
co_change_matrix[co_index,this_file] = sum_co_changes
|
113
|
+
end
|
114
|
+
end
|
115
|
+
index2item = Hash.new
|
116
|
+
item2index = Hash.new
|
117
|
+
co_change_hash.each do |k,v|
|
118
|
+
index2item[v[:index]] = k
|
119
|
+
item2index[k] = v[:index]
|
120
|
+
end
|
121
|
+
[index2item,item2index,co_change_matrix]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
data/lib/evoc/tx.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Tx
|
3
|
+
include Comparable
|
4
|
+
attr_reader :id, :date
|
5
|
+
attr_accessor :items, :index
|
6
|
+
|
7
|
+
def initialize(index: nil,id:,date: nil,items:)
|
8
|
+
@index = index
|
9
|
+
@id = id
|
10
|
+
@date = date
|
11
|
+
@items = items
|
12
|
+
end
|
13
|
+
|
14
|
+
def size
|
15
|
+
self.items.size
|
16
|
+
end
|
17
|
+
|
18
|
+
def <=> other
|
19
|
+
self.index <=> other.index
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_i
|
23
|
+
self.index.to_i
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
self.id.to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_a
|
31
|
+
self.items.to_a
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|