evoc 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
@@ -0,0 +1,303 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Scenario
|
3
|
+
include Comparable, Logging
|
4
|
+
|
5
|
+
attr_accessor :case_id,
|
6
|
+
:granularity,
|
7
|
+
:scenario_id,
|
8
|
+
:tx,
|
9
|
+
:algorithm,
|
10
|
+
:measures,
|
11
|
+
:aggregator,
|
12
|
+
:tx_index,
|
13
|
+
:tx_id,
|
14
|
+
:permutation,
|
15
|
+
:query,
|
16
|
+
:model_size,
|
17
|
+
:model_age,
|
18
|
+
:max_size,
|
19
|
+
:opts
|
20
|
+
|
21
|
+
def initialize(opts = Hash.new)
|
22
|
+
logger.debug "Initialized new scenario with configuration: #{opts}"
|
23
|
+
self.opts = opts
|
24
|
+
self.scenario_id = opts.hash
|
25
|
+
|
26
|
+
# model_size depends on model_age, so set model_age first
|
27
|
+
self.model_age = opts[:model_age]
|
28
|
+
opts.each do |attribute,value|
|
29
|
+
self.send("#{attribute}=", value)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
##
|
35
|
+
# <=> defines how to compare two Query objects
|
36
|
+
def <=> other
|
37
|
+
return nil unless other.is_a?(Query)
|
38
|
+
comparison = 0
|
39
|
+
# first we compare the tx id
|
40
|
+
if (self.tx_id <=> other.tx_id) == 0
|
41
|
+
# if we also have the same query
|
42
|
+
if (self.query.sort <=> other.query.sort) == 0
|
43
|
+
# use history size as comparator
|
44
|
+
comparison = (self.model_size <=> other.model_size)
|
45
|
+
else
|
46
|
+
# use the query
|
47
|
+
comparison = (self.query.sort <=> other.query.sort)
|
48
|
+
end
|
49
|
+
else
|
50
|
+
# use the tx id
|
51
|
+
comparison = (self.tx_id <=> other.tx_id)
|
52
|
+
end
|
53
|
+
comparison
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Executes a query given the current paramaters
|
58
|
+
# This results in a set of association rules, i.e., a recommendation
|
59
|
+
#
|
60
|
+
# Producing a recommendation is done through the following process:
|
61
|
+
#
|
62
|
+
# 1. Generate rules using a mining algorithm on the specified history
|
63
|
+
# 2. Calculate interestingness measures on the generated rules
|
64
|
+
# (optional) 3. Aggregate rules to further improve recommendation
|
65
|
+
# (optional) 4. Evaluate how good the recommendation is
|
66
|
+
#
|
67
|
+
# @return [Hash] containing the query + scenario + recommendation + other metadata
|
68
|
+
def call(evaluators: [])
|
69
|
+
#generate recommendation in cache (generate rules + measures on rules)
|
70
|
+
self.recommendation
|
71
|
+
|
72
|
+
# evaluate if requested
|
73
|
+
if !evaluators.empty?
|
74
|
+
Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
|
75
|
+
end
|
76
|
+
# build return hash
|
77
|
+
recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
|
78
|
+
return self.to_h.merge(recommendation)
|
79
|
+
end
|
80
|
+
|
81
|
+
def to_h
|
82
|
+
fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
|
83
|
+
hash = Hash.new
|
84
|
+
fields.each do |key|
|
85
|
+
value = self.method(key).call
|
86
|
+
hash[key] = value.is_a?(Array) ? value.join(',') : value
|
87
|
+
end
|
88
|
+
return hash
|
89
|
+
end
|
90
|
+
|
91
|
+
def recommendation
|
92
|
+
Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
|
93
|
+
query: self.query,
|
94
|
+
model_start: self.model_start,
|
95
|
+
model_end: self.model_end,
|
96
|
+
max_size: self.max_size,
|
97
|
+
aggregator: self.aggregator,
|
98
|
+
measures: self.measures)
|
99
|
+
end
|
100
|
+
|
101
|
+
def recommendation?
|
102
|
+
Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
|
103
|
+
query: self.query,
|
104
|
+
model_start: self.model_start,
|
105
|
+
model_end: self.model_end,
|
106
|
+
max_size: self.max_size)
|
107
|
+
end
|
108
|
+
|
109
|
+
def to_s
|
110
|
+
self.opts
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
##
|
115
|
+
# CUSTOM SETTERS AND GETTERS
|
116
|
+
#
|
117
|
+
|
118
|
+
##
|
119
|
+
# @return [Evoc::Tx] the transaction of this scenario
|
120
|
+
def tx
|
121
|
+
tx = nil
|
122
|
+
if !self.tx_id.nil?
|
123
|
+
tx = Evoc::HistoryStore.base_history.get_tx(id: self.tx_id, id_type: :id)
|
124
|
+
end
|
125
|
+
return tx
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# @return [Integer] the size of the query
|
130
|
+
def query_size
|
131
|
+
self.query.size
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# @return [Float] the percentage of the whole history that is used in the model
|
136
|
+
def model_percentage
|
137
|
+
self.model_size.to_i == 0 ? 100 : ((self.model_size.to_f/self.tx_index)*100).round(2)
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# Sets the model size in this scenario
|
142
|
+
#
|
143
|
+
# If set to 0 (zero), the maximum possible size is calculated
|
144
|
+
# In this case, model age will be taken into account
|
145
|
+
#
|
146
|
+
# @param [Integer] the size of the model
|
147
|
+
def model_size=(size)
|
148
|
+
if size.to_i == 0
|
149
|
+
@model_size = (self.model_age.nil? ? self.tx_index : self.tx_index - self.model_age)
|
150
|
+
else
|
151
|
+
@model_size = size.to_i
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
##
|
156
|
+
# @return [Integer] the index in the history where the model starts
|
157
|
+
def model_start
|
158
|
+
value = self.tx_index - self.model_size.to_i - self.model_age.to_i
|
159
|
+
if value < 0
|
160
|
+
raise ArgumentError, "The model start index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
|
161
|
+
end
|
162
|
+
return value
|
163
|
+
end
|
164
|
+
|
165
|
+
##
|
166
|
+
# @return [Integer] the index in the history where the model ends
|
167
|
+
def model_end
|
168
|
+
value = (self.tx_index - 1 - self.model_age.to_i)
|
169
|
+
if value < 0
|
170
|
+
raise ArgumentError, "The model end index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
|
171
|
+
elsif value < self.model_start
|
172
|
+
raise ArgumentError, "The model end was before the model start (start: #{self.model_start}, end: #{value})"
|
173
|
+
end
|
174
|
+
return value
|
175
|
+
end
|
176
|
+
|
177
|
+
##
|
178
|
+
# @return [Integer] the time between the first and last transaction in the model
|
179
|
+
def model_hours
|
180
|
+
model_end_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_end)
|
181
|
+
model_start_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_start)
|
182
|
+
return TimeDifference.between(model_start_tx.date,model_end_tx.date).in_hours
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# query_percentage
|
187
|
+
def query_percentage
|
188
|
+
total_items = self.query.size.to_i + self.expected_outcome.size
|
189
|
+
(self.query.size.to_f/total_items*100).round
|
190
|
+
end
|
191
|
+
|
192
|
+
|
193
|
+
##
|
194
|
+
# max_size
|
195
|
+
def max_size=(value)
|
196
|
+
if !value.nil?
|
197
|
+
if value.respond_to?(:to_i)
|
198
|
+
@max_size = value.to_i
|
199
|
+
else
|
200
|
+
raise ArgumentError.new, "value for max_size could not be converted to integer, value was: #{value}"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
##
|
206
|
+
# custom setter for query
|
207
|
+
def query=(query)
|
208
|
+
if !query.nil?
|
209
|
+
@query = query
|
210
|
+
# convert to list of integers
|
211
|
+
@query.map!(&:to_i)
|
212
|
+
if !self.tx.nil?
|
213
|
+
if @query.size >= (self.tx.size)
|
214
|
+
$stderr.warn "The query was larger than or equal to the size of the transaction (Qs: #{@query.size}, Tx size: #{self.tx.size})"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
if @query.empty?
|
218
|
+
raise ArgumentError, "The query was empty"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
##
|
224
|
+
# Custom setter for measures
|
225
|
+
def measures=(measures)
|
226
|
+
if !measures.nil?
|
227
|
+
# internally, all interestingness measures are
|
228
|
+
# represented as :m_measurename (symbols), so we concatinate
|
229
|
+
# 'm_' and symbolize the list of given measures
|
230
|
+
if measures.is_a?(String)
|
231
|
+
measures = measures.split(',')
|
232
|
+
end
|
233
|
+
@measures = measures.map {|m| (/\Am_/ =~ m).nil? ? ('m_'+m).to_sym : m.to_sym}
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
##
|
238
|
+
# @return [Array] the list of expected items
|
239
|
+
def expected_outcome
|
240
|
+
expected_outcome = (self.tx.items - self.query)
|
241
|
+
if expected_outcome.empty?
|
242
|
+
logger.warn "The expected outcome was empty"
|
243
|
+
end
|
244
|
+
return expected_outcome
|
245
|
+
end
|
246
|
+
|
247
|
+
##
|
248
|
+
# @return [Integer] the size of the expected outcome
|
249
|
+
def expected_outcome_size
|
250
|
+
self.expected_outcome.size
|
251
|
+
end
|
252
|
+
|
253
|
+
##
|
254
|
+
# @return [String] the history index of this scenarios transaction
|
255
|
+
def tx_index
|
256
|
+
self.tx.index
|
257
|
+
end
|
258
|
+
|
259
|
+
##
|
260
|
+
# @return [Integer] the size of this scenarios transaction
|
261
|
+
def tx_size
|
262
|
+
self.tx.size
|
263
|
+
end
|
264
|
+
|
265
|
+
##
|
266
|
+
#
|
267
|
+
def instance_values_for_csv
|
268
|
+
dont_include = ['opts', 'logger','time','filtered_model_size']
|
269
|
+
self.instance_values.delete_if {|k,v| dont_include.include?(k)}
|
270
|
+
end
|
271
|
+
|
272
|
+
##
|
273
|
+
# generate an array suitable for a csv header
|
274
|
+
def csv_header
|
275
|
+
query = self.instance_values_for_csv.keys
|
276
|
+
rule_store = !self.recommendation? ? [] : self.recommendation.csv_header
|
277
|
+
rule_store + query
|
278
|
+
end
|
279
|
+
|
280
|
+
##
|
281
|
+
# generate an array of the current values of <self>
|
282
|
+
# converts any array values to a comma separated string representation
|
283
|
+
def to_csv_row
|
284
|
+
query = self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
|
285
|
+
rule_store = !self.recommendation? ? [] : self.recommendation.to_csv_row
|
286
|
+
rule_store + query
|
287
|
+
end
|
288
|
+
|
289
|
+
|
290
|
+
|
291
|
+
##
|
292
|
+
# Prints the rules to standard out
|
293
|
+
# sorted by strength
|
294
|
+
def print
|
295
|
+
if !self.recommendation?
|
296
|
+
$stdout.puts ""
|
297
|
+
else
|
298
|
+
self.recommendation.print(measures)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
end
|
303
|
+
end
|
data/lib/evoc/svd.rb
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
module Evoc
|
2
|
+
##
|
3
|
+
# CLASS SVD
|
4
|
+
#
|
5
|
+
# public fields:
|
6
|
+
# index:
|
7
|
+
# hash of {index -> file}, where file is at "index" in the co_change_matrix and svd matrix
|
8
|
+
class SVD
|
9
|
+
attr_accessor :co_change_matrix, :u, :s, :v
|
10
|
+
|
11
|
+
def initialize(tx_store = nil)
|
12
|
+
if tx_store.is_a?(Evoc::TxStore)
|
13
|
+
@index2item,@item2index,@co_change_matrix = txstore_2_co_change_matrix(tx_store)
|
14
|
+
@u,@s,@v = svd(@co_change_matrix)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def indexes
|
19
|
+
@index2item.keys
|
20
|
+
end
|
21
|
+
|
22
|
+
def items
|
23
|
+
@item2index.keys
|
24
|
+
end
|
25
|
+
|
26
|
+
def index2item index
|
27
|
+
@index2item[index]
|
28
|
+
end
|
29
|
+
|
30
|
+
def item2index item
|
31
|
+
@item2index[item]
|
32
|
+
end
|
33
|
+
|
34
|
+
def svd(co_change_matrix)
|
35
|
+
u,s,v = co_change_matrix.gesvd
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Find the clusters in the current svd given a change-vector/query
|
40
|
+
#
|
41
|
+
# threshold: the minimum value of an element in the U matrix,
|
42
|
+
# to be considered as part of an cluster
|
43
|
+
def clusters(query,threshold = 0)
|
44
|
+
clusters = Hash.new
|
45
|
+
perfect_match = []
|
46
|
+
query_indexes = query.map {|q_item| item2index(q_item)}.compact #remove nil values
|
47
|
+
col_index = 0
|
48
|
+
self.u.each_column do |col|
|
49
|
+
#initiate cluster
|
50
|
+
clusters[col_index] = {pos: {query_match: [], clustered: []},
|
51
|
+
neg: {query_match: [], clustered: []}}
|
52
|
+
# get the column of the item
|
53
|
+
col.each_with_index do |row_item,row_index|
|
54
|
+
# check that the row item is part of cluster
|
55
|
+
if row_item.abs > threshold
|
56
|
+
sign = row_item > 0 ? :pos : :neg
|
57
|
+
# check if its another item from the query
|
58
|
+
if query_indexes.include? row_index
|
59
|
+
clusters[col_index][sign][:query_match] << index2item(row_index)
|
60
|
+
# check if all items in the cluster was in the query (perfect match)
|
61
|
+
if clusters[col_index][sign][:query_match].size == query.size
|
62
|
+
perfect_match << [col_index,sign]
|
63
|
+
end
|
64
|
+
else
|
65
|
+
clusters[col_index][sign][:clustered] << [index2item(row_index),row_item]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
col_index += 1
|
70
|
+
end
|
71
|
+
[perfect_match,clusters]
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
##
|
76
|
+
# Returns the co-change matrix of currently loaded files
|
77
|
+
# each column/row combination specifies how many times the
|
78
|
+
# two files changed together
|
79
|
+
#
|
80
|
+
# f1 f2 f3
|
81
|
+
# f1 2 1 1
|
82
|
+
# f2 1 3 1
|
83
|
+
# f3 1 1 1
|
84
|
+
def txstore_2_co_change_matrix(tx_store)
|
85
|
+
co_change_hash = Hash.new
|
86
|
+
tx_store.each do |tx|
|
87
|
+
tx.items.each do |file_id|
|
88
|
+
if co_change_hash[file_id] == nil
|
89
|
+
co_change_hash[file_id] = {:co_changed => Hash.new}
|
90
|
+
end
|
91
|
+
tx.items.each do |co_changed_file_id|
|
92
|
+
if co_change_hash[file_id][:co_changed][co_changed_file_id] == nil
|
93
|
+
co_change_hash[file_id][:co_changed][co_changed_file_id] = 1
|
94
|
+
else
|
95
|
+
co_change_hash[file_id][:co_changed][co_changed_file_id] += 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
# add indexes
|
101
|
+
co_change_hash.each_with_index do |(key,value),index|
|
102
|
+
co_change_hash[key][:index] = index
|
103
|
+
end
|
104
|
+
# Generate the 2 wise dependency weight array
|
105
|
+
#
|
106
|
+
n = co_change_hash.size
|
107
|
+
co_change_matrix = NMatrix.new(n,0,dtype: :float64)
|
108
|
+
co_change_hash.each_with_index do |(key,value),index|
|
109
|
+
this_file = index
|
110
|
+
value[:co_changed].each do |(co_changed_file,sum_co_changes)|
|
111
|
+
co_index = (co_change_hash[co_changed_file][:index])
|
112
|
+
co_change_matrix[co_index,this_file] = sum_co_changes
|
113
|
+
end
|
114
|
+
end
|
115
|
+
index2item = Hash.new
|
116
|
+
item2index = Hash.new
|
117
|
+
co_change_hash.each do |k,v|
|
118
|
+
index2item[v[:index]] = k
|
119
|
+
item2index[k] = v[:index]
|
120
|
+
end
|
121
|
+
[index2item,item2index,co_change_matrix]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
data/lib/evoc/tx.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Tx
|
3
|
+
include Comparable
|
4
|
+
attr_reader :id, :date
|
5
|
+
attr_accessor :items, :index
|
6
|
+
|
7
|
+
def initialize(index: nil,id:,date: nil,items:)
|
8
|
+
@index = index
|
9
|
+
@id = id
|
10
|
+
@date = date
|
11
|
+
@items = items
|
12
|
+
end
|
13
|
+
|
14
|
+
def size
|
15
|
+
self.items.size
|
16
|
+
end
|
17
|
+
|
18
|
+
def <=> other
|
19
|
+
self.index <=> other.index
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_i
|
23
|
+
self.index.to_i
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
self.id.to_s
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_a
|
31
|
+
self.items.to_a
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|