evoc 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/Makefile +4 -0
  8. data/README.md +61 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/evoc +3 -0
  12. data/bin/setup +7 -0
  13. data/evoc.gemspec +30 -0
  14. data/lib/evoc/algorithm.rb +147 -0
  15. data/lib/evoc/algorithms/top_k.rb +86 -0
  16. data/lib/evoc/analyze.rb +395 -0
  17. data/lib/evoc/array.rb +43 -0
  18. data/lib/evoc/evaluate.rb +109 -0
  19. data/lib/evoc/exceptions/aggregation_error.rb +6 -0
  20. data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
  21. data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
  22. data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
  23. data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
  24. data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
  25. data/lib/evoc/exceptions/no_result.rb +6 -0
  26. data/lib/evoc/exceptions/non_finite.rb +8 -0
  27. data/lib/evoc/exceptions/non_numeric.rb +8 -0
  28. data/lib/evoc/exceptions/not_a_query.rb +6 -0
  29. data/lib/evoc/exceptions/not_a_result.rb +6 -0
  30. data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
  31. data/lib/evoc/exceptions/not_initialized.rb +6 -0
  32. data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
  33. data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
  34. data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
  35. data/lib/evoc/experiment.rb +239 -0
  36. data/lib/evoc/hash.rb +56 -0
  37. data/lib/evoc/history_store.rb +53 -0
  38. data/lib/evoc/hyper_rule.rb +53 -0
  39. data/lib/evoc/interestingness_measure.rb +77 -0
  40. data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
  41. data/lib/evoc/interestingness_measures.rb +882 -0
  42. data/lib/evoc/logger.rb +34 -0
  43. data/lib/evoc/memory_profiler.rb +43 -0
  44. data/lib/evoc/recommendation_cache.rb +152 -0
  45. data/lib/evoc/rule.rb +32 -0
  46. data/lib/evoc/rule_store.rb +340 -0
  47. data/lib/evoc/scenario.rb +303 -0
  48. data/lib/evoc/svd.rb +124 -0
  49. data/lib/evoc/tx.rb +34 -0
  50. data/lib/evoc/tx_store.rb +379 -0
  51. data/lib/evoc/version.rb +3 -0
  52. data/lib/evoc.rb +4 -0
  53. data/lib/evoc_cli/analyze.rb +198 -0
  54. data/lib/evoc_cli/cli_helper.rb +1 -0
  55. data/lib/evoc_cli/experiment.rb +78 -0
  56. data/lib/evoc_cli/info.rb +22 -0
  57. data/lib/evoc_cli/main.rb +29 -0
  58. data/lib/evoc_cli/util.rb +36 -0
  59. data/lib/evoc_helper.rb +40 -0
  60. data/mem_profiler/Gemfile.lock +39 -0
  61. data/mem_profiler/README.md +126 -0
  62. data/mem_profiler/createdb.rb +4 -0
  63. data/mem_profiler/db.rb +82 -0
  64. data/mem_profiler/gemfile +6 -0
  65. data/mem_profiler/gencsv.rb +64 -0
  66. data/mem_profiler/genimport.sh +8 -0
  67. data/mem_profiler/graph.rb +91 -0
  68. metadata +251 -0
@@ -0,0 +1,303 @@
1
+ module Evoc
2
+ class Scenario
3
+ include Comparable, Logging
4
+
5
+ attr_accessor :case_id,
6
+ :granularity,
7
+ :scenario_id,
8
+ :tx,
9
+ :algorithm,
10
+ :measures,
11
+ :aggregator,
12
+ :tx_index,
13
+ :tx_id,
14
+ :permutation,
15
+ :query,
16
+ :model_size,
17
+ :model_age,
18
+ :max_size,
19
+ :opts
20
+
21
+ def initialize(opts = Hash.new)
22
+ logger.debug "Initialized new scenario with configuration: #{opts}"
23
+ self.opts = opts
24
+ self.scenario_id = opts.hash
25
+
26
+ # model_size depends on model_age, so set model_age first
27
+ self.model_age = opts[:model_age]
28
+ opts.each do |attribute,value|
29
+ self.send("#{attribute}=", value)
30
+ end
31
+ end
32
+
33
+
34
+ ##
35
+ # <=> defines how to compare two Query objects
36
+ def <=> other
37
+ return nil unless other.is_a?(Query)
38
+ comparison = 0
39
+ # first we compare the tx id
40
+ if (self.tx_id <=> other.tx_id) == 0
41
+ # if we also have the same query
42
+ if (self.query.sort <=> other.query.sort) == 0
43
+ # use history size as comparator
44
+ comparison = (self.model_size <=> other.model_size)
45
+ else
46
+ # use the query
47
+ comparison = (self.query.sort <=> other.query.sort)
48
+ end
49
+ else
50
+ # use the tx id
51
+ comparison = (self.tx_id <=> other.tx_id)
52
+ end
53
+ comparison
54
+ end
55
+
56
+ ##
57
+ # Executes a query given the current paramaters
58
+ # This results in a set of association rules, i.e., a recommendation
59
+ #
60
+ # Producing a recommendation is done through the following process:
61
+ #
62
+ # 1. Generate rules using a mining algorithm on the specified history
63
+ # 2. Calculate interestingness measures on the generated rules
64
+ # (optional) 3. Aggregate rules to further improve recommendation
65
+ # (optional) 4. Evaluate how good the recommendation is
66
+ #
67
+ # @return [Hash] containing the query + scenario + recommendation + other metadata
68
+ def call(evaluators: [])
69
+ #generate recommendation in cache (generate rules + measures on rules)
70
+ self.recommendation
71
+
72
+ # evaluate if requested
73
+ if !evaluators.empty?
74
+ Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
75
+ end
76
+ # build return hash
77
+ recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
78
+ return self.to_h.merge(recommendation)
79
+ end
80
+
81
+ def to_h
82
+ fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
83
+ hash = Hash.new
84
+ fields.each do |key|
85
+ value = self.method(key).call
86
+ hash[key] = value.is_a?(Array) ? value.join(',') : value
87
+ end
88
+ return hash
89
+ end
90
+
91
+ def recommendation
92
+ Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
93
+ query: self.query,
94
+ model_start: self.model_start,
95
+ model_end: self.model_end,
96
+ max_size: self.max_size,
97
+ aggregator: self.aggregator,
98
+ measures: self.measures)
99
+ end
100
+
101
+ def recommendation?
102
+ Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
103
+ query: self.query,
104
+ model_start: self.model_start,
105
+ model_end: self.model_end,
106
+ max_size: self.max_size)
107
+ end
108
+
109
+ def to_s
110
+ self.opts
111
+ end
112
+
113
+
114
+ ##
115
+ # CUSTOM SETTERS AND GETTERS
116
+ #
117
+
118
+ ##
119
+ # @return [Evoc::Tx] the transaction of this scenario
120
+ def tx
121
+ tx = nil
122
+ if !self.tx_id.nil?
123
+ tx = Evoc::HistoryStore.base_history.get_tx(id: self.tx_id, id_type: :id)
124
+ end
125
+ return tx
126
+ end
127
+
128
+ ##
129
+ # @return [Integer] the size of the query
130
+ def query_size
131
+ self.query.size
132
+ end
133
+
134
+ ##
135
+ # @return [Float] the percentage of the whole history that is used in the model
136
+ def model_percentage
137
+ self.model_size.to_i == 0 ? 100 : ((self.model_size.to_f/self.tx_index)*100).round(2)
138
+ end
139
+
140
+ ##
141
+ # Sets the model size in this scenario
142
+ #
143
+ # If set to 0 (zero), the maximum possible size is calculated
144
+ # In this case, model age will be taken into account
145
+ #
146
+ # @param [Integer] the size of the model
147
+ def model_size=(size)
148
+ if size.to_i == 0
149
+ @model_size = (self.model_age.nil? ? self.tx_index : self.tx_index - self.model_age)
150
+ else
151
+ @model_size = size.to_i
152
+ end
153
+ end
154
+
155
+ ##
156
+ # @return [Integer] the index in the history where the model starts
157
+ def model_start
158
+ value = self.tx_index - self.model_size.to_i - self.model_age.to_i
159
+ if value < 0
160
+ raise ArgumentError, "The model start index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
161
+ end
162
+ return value
163
+ end
164
+
165
+ ##
166
+ # @return [Integer] the index in the history where the model ends
167
+ def model_end
168
+ value = (self.tx_index - 1 - self.model_age.to_i)
169
+ if value < 0
170
+ raise ArgumentError, "The model end index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
171
+ elsif value < self.model_start
172
+ raise ArgumentError, "The model end was before the model start (start: #{self.model_start}, end: #{value})"
173
+ end
174
+ return value
175
+ end
176
+
177
+ ##
178
+ # @return [Integer] the time between the first and last transaction in the model
179
+ def model_hours
180
+ model_end_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_end)
181
+ model_start_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_start)
182
+ return TimeDifference.between(model_start_tx.date,model_end_tx.date).in_hours
183
+ end
184
+
185
+ ##
186
+ # query_percentage
187
+ def query_percentage
188
+ total_items = self.query.size.to_i + self.expected_outcome.size
189
+ (self.query.size.to_f/total_items*100).round
190
+ end
191
+
192
+
193
+ ##
194
+ # max_size
195
+ def max_size=(value)
196
+ if !value.nil?
197
+ if value.respond_to?(:to_i)
198
+ @max_size = value.to_i
199
+ else
200
+ raise ArgumentError.new, "value for max_size could not be converted to integer, value was: #{value}"
201
+ end
202
+ end
203
+ end
204
+
205
+ ##
206
+ # custom setter for query
207
+ def query=(query)
208
+ if !query.nil?
209
+ @query = query
210
+ # convert to list of integers
211
+ @query.map!(&:to_i)
212
+ if !self.tx.nil?
213
+ if @query.size >= (self.tx.size)
214
+ $stderr.warn "The query was larger than or equal to the size of the transaction (Qs: #{@query.size}, Tx size: #{self.tx.size})"
215
+ end
216
+ end
217
+ if @query.empty?
218
+ raise ArgumentError, "The query was empty"
219
+ end
220
+ end
221
+ end
222
+
223
+ ##
224
+ # Custom setter for measures
225
+ def measures=(measures)
226
+ if !measures.nil?
227
+ # internally, all interestingness measures are
228
+ # represented as :m_measurename (symbols), so we concatinate
229
+ # 'm_' and symbolize the list of given measures
230
+ if measures.is_a?(String)
231
+ measures = measures.split(',')
232
+ end
233
+ @measures = measures.map {|m| (/\Am_/ =~ m).nil? ? ('m_'+m).to_sym : m.to_sym}
234
+ end
235
+ end
236
+
237
+ ##
238
+ # @return [Array] the list of expected items
239
+ def expected_outcome
240
+ expected_outcome = (self.tx.items - self.query)
241
+ if expected_outcome.empty?
242
+ logger.warn "The expected outcome was empty"
243
+ end
244
+ return expected_outcome
245
+ end
246
+
247
+ ##
248
+ # @return [Integer] the size of the expected outcome
249
+ def expected_outcome_size
250
+ self.expected_outcome.size
251
+ end
252
+
253
+ ##
254
+ # @return [String] the history index of this scenarios transaction
255
+ def tx_index
256
+ self.tx.index
257
+ end
258
+
259
+ ##
260
+ # @return [Integer] the size of this scenarios transaction
261
+ def tx_size
262
+ self.tx.size
263
+ end
264
+
265
+ ##
266
+ #
267
+ def instance_values_for_csv
268
+ dont_include = ['opts', 'logger','time','filtered_model_size']
269
+ self.instance_values.delete_if {|k,v| dont_include.include?(k)}
270
+ end
271
+
272
+ ##
273
+ # generate an array suitable for a csv header
274
+ def csv_header
275
+ query = self.instance_values_for_csv.keys
276
+ rule_store = !self.recommendation? ? [] : self.recommendation.csv_header
277
+ rule_store + query
278
+ end
279
+
280
+ ##
281
+ # generate an array of the current values of <self>
282
+ # converts any array values to a comma separated string representation
283
+ def to_csv_row
284
+ query = self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
285
+ rule_store = !self.recommendation? ? [] : self.recommendation.to_csv_row
286
+ rule_store + query
287
+ end
288
+
289
+
290
+
291
+ ##
292
+ # Prints the rules to standard out
293
+ # sorted by strength
294
+ def print
295
+ if !self.recommendation?
296
+ $stdout.puts ""
297
+ else
298
+ self.recommendation.print(measures)
299
+ end
300
+ end
301
+
302
+ end
303
+ end
data/lib/evoc/svd.rb ADDED
@@ -0,0 +1,124 @@
1
+ module Evoc
2
+ ##
3
+ # CLASS SVD
4
+ #
5
+ # public fields:
6
+ # index:
7
+ # hash of {index -> file}, where file is at "index" in the co_change_matrix and svd matrix
8
+ class SVD
9
+ attr_accessor :co_change_matrix, :u, :s, :v
10
+
11
+ def initialize(tx_store = nil)
12
+ if tx_store.is_a?(Evoc::TxStore)
13
+ @index2item,@item2index,@co_change_matrix = txstore_2_co_change_matrix(tx_store)
14
+ @u,@s,@v = svd(@co_change_matrix)
15
+ end
16
+ end
17
+
18
+ def indexes
19
+ @index2item.keys
20
+ end
21
+
22
+ def items
23
+ @item2index.keys
24
+ end
25
+
26
+ def index2item index
27
+ @index2item[index]
28
+ end
29
+
30
+ def item2index item
31
+ @item2index[item]
32
+ end
33
+
34
+ def svd(co_change_matrix)
35
+ u,s,v = co_change_matrix.gesvd
36
+ end
37
+
38
+ ##
39
+ # Find the clusters in the current svd given a change-vector/query
40
+ #
41
+ # threshold: the minimum value of an element in the U matrix,
42
+ # to be considered as part of an cluster
43
+ def clusters(query,threshold = 0)
44
+ clusters = Hash.new
45
+ perfect_match = []
46
+ query_indexes = query.map {|q_item| item2index(q_item)}.compact #remove nil values
47
+ col_index = 0
48
+ self.u.each_column do |col|
49
+ #initiate cluster
50
+ clusters[col_index] = {pos: {query_match: [], clustered: []},
51
+ neg: {query_match: [], clustered: []}}
52
+ # get the column of the item
53
+ col.each_with_index do |row_item,row_index|
54
+ # check that the row item is part of cluster
55
+ if row_item.abs > threshold
56
+ sign = row_item > 0 ? :pos : :neg
57
+ # check if its another item from the query
58
+ if query_indexes.include? row_index
59
+ clusters[col_index][sign][:query_match] << index2item(row_index)
60
+ # check if all items in the cluster was in the query (perfect match)
61
+ if clusters[col_index][sign][:query_match].size == query.size
62
+ perfect_match << [col_index,sign]
63
+ end
64
+ else
65
+ clusters[col_index][sign][:clustered] << [index2item(row_index),row_item]
66
+ end
67
+ end
68
+ end
69
+ col_index += 1
70
+ end
71
+ [perfect_match,clusters]
72
+ end
73
+
74
+
75
+ ##
76
+ # Returns the co-change matrix of currently loaded files
77
+ # each column/row combination specifies how many times the
78
+ # two files changed together
79
+ #
80
+ # f1 f2 f3
81
+ # f1 2 1 1
82
+ # f2 1 3 1
83
+ # f3 1 1 1
84
+ def txstore_2_co_change_matrix(tx_store)
85
+ co_change_hash = Hash.new
86
+ tx_store.each do |tx|
87
+ tx.items.each do |file_id|
88
+ if co_change_hash[file_id] == nil
89
+ co_change_hash[file_id] = {:co_changed => Hash.new}
90
+ end
91
+ tx.items.each do |co_changed_file_id|
92
+ if co_change_hash[file_id][:co_changed][co_changed_file_id] == nil
93
+ co_change_hash[file_id][:co_changed][co_changed_file_id] = 1
94
+ else
95
+ co_change_hash[file_id][:co_changed][co_changed_file_id] += 1
96
+ end
97
+ end
98
+ end
99
+ end
100
+ # add indexes
101
+ co_change_hash.each_with_index do |(key,value),index|
102
+ co_change_hash[key][:index] = index
103
+ end
104
+ # Generate the 2 wise dependency weight array
105
+ #
106
+ n = co_change_hash.size
107
+ co_change_matrix = NMatrix.new(n,0,dtype: :float64)
108
+ co_change_hash.each_with_index do |(key,value),index|
109
+ this_file = index
110
+ value[:co_changed].each do |(co_changed_file,sum_co_changes)|
111
+ co_index = (co_change_hash[co_changed_file][:index])
112
+ co_change_matrix[co_index,this_file] = sum_co_changes
113
+ end
114
+ end
115
+ index2item = Hash.new
116
+ item2index = Hash.new
117
+ co_change_hash.each do |k,v|
118
+ index2item[v[:index]] = k
119
+ item2index[k] = v[:index]
120
+ end
121
+ [index2item,item2index,co_change_matrix]
122
+ end
123
+ end
124
+ end
data/lib/evoc/tx.rb ADDED
@@ -0,0 +1,34 @@
1
+ module Evoc
2
+ class Tx
3
+ include Comparable
4
+ attr_reader :id, :date
5
+ attr_accessor :items, :index
6
+
7
+ def initialize(index: nil,id:,date: nil,items:)
8
+ @index = index
9
+ @id = id
10
+ @date = date
11
+ @items = items
12
+ end
13
+
14
+ def size
15
+ self.items.size
16
+ end
17
+
18
+ def <=> other
19
+ self.index <=> other.index
20
+ end
21
+
22
+ def to_i
23
+ self.index.to_i
24
+ end
25
+
26
+ def to_s
27
+ self.id.to_s
28
+ end
29
+
30
+ def to_a
31
+ self.items.to_a
32
+ end
33
+ end
34
+ end