evoc 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/Makefile +4 -0
  8. data/README.md +61 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/evoc +3 -0
  12. data/bin/setup +7 -0
  13. data/evoc.gemspec +30 -0
  14. data/lib/evoc/algorithm.rb +147 -0
  15. data/lib/evoc/algorithms/top_k.rb +86 -0
  16. data/lib/evoc/analyze.rb +395 -0
  17. data/lib/evoc/array.rb +43 -0
  18. data/lib/evoc/evaluate.rb +109 -0
  19. data/lib/evoc/exceptions/aggregation_error.rb +6 -0
  20. data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
  21. data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
  22. data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
  23. data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
  24. data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
  25. data/lib/evoc/exceptions/no_result.rb +6 -0
  26. data/lib/evoc/exceptions/non_finite.rb +8 -0
  27. data/lib/evoc/exceptions/non_numeric.rb +8 -0
  28. data/lib/evoc/exceptions/not_a_query.rb +6 -0
  29. data/lib/evoc/exceptions/not_a_result.rb +6 -0
  30. data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
  31. data/lib/evoc/exceptions/not_initialized.rb +6 -0
  32. data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
  33. data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
  34. data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
  35. data/lib/evoc/experiment.rb +239 -0
  36. data/lib/evoc/hash.rb +56 -0
  37. data/lib/evoc/history_store.rb +53 -0
  38. data/lib/evoc/hyper_rule.rb +53 -0
  39. data/lib/evoc/interestingness_measure.rb +77 -0
  40. data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
  41. data/lib/evoc/interestingness_measures.rb +882 -0
  42. data/lib/evoc/logger.rb +34 -0
  43. data/lib/evoc/memory_profiler.rb +43 -0
  44. data/lib/evoc/recommendation_cache.rb +152 -0
  45. data/lib/evoc/rule.rb +32 -0
  46. data/lib/evoc/rule_store.rb +340 -0
  47. data/lib/evoc/scenario.rb +303 -0
  48. data/lib/evoc/svd.rb +124 -0
  49. data/lib/evoc/tx.rb +34 -0
  50. data/lib/evoc/tx_store.rb +379 -0
  51. data/lib/evoc/version.rb +3 -0
  52. data/lib/evoc.rb +4 -0
  53. data/lib/evoc_cli/analyze.rb +198 -0
  54. data/lib/evoc_cli/cli_helper.rb +1 -0
  55. data/lib/evoc_cli/experiment.rb +78 -0
  56. data/lib/evoc_cli/info.rb +22 -0
  57. data/lib/evoc_cli/main.rb +29 -0
  58. data/lib/evoc_cli/util.rb +36 -0
  59. data/lib/evoc_helper.rb +40 -0
  60. data/mem_profiler/Gemfile.lock +39 -0
  61. data/mem_profiler/README.md +126 -0
  62. data/mem_profiler/createdb.rb +4 -0
  63. data/mem_profiler/db.rb +82 -0
  64. data/mem_profiler/gemfile +6 -0
  65. data/mem_profiler/gencsv.rb +64 -0
  66. data/mem_profiler/genimport.sh +8 -0
  67. data/mem_profiler/graph.rb +91 -0
  68. metadata +251 -0
@@ -0,0 +1,303 @@
1
+ module Evoc
2
+ class Scenario
3
+ include Comparable, Logging
4
+
5
+ attr_accessor :case_id,
6
+ :granularity,
7
+ :scenario_id,
8
+ :tx,
9
+ :algorithm,
10
+ :measures,
11
+ :aggregator,
12
+ :tx_index,
13
+ :tx_id,
14
+ :permutation,
15
+ :query,
16
+ :model_size,
17
+ :model_age,
18
+ :max_size,
19
+ :opts
20
+
21
+ def initialize(opts = Hash.new)
22
+ logger.debug "Initialized new scenario with configuration: #{opts}"
23
+ self.opts = opts
24
+ self.scenario_id = opts.hash
25
+
26
+ # model_size depends on model_age, so set model_age first
27
+ self.model_age = opts[:model_age]
28
+ opts.each do |attribute,value|
29
+ self.send("#{attribute}=", value)
30
+ end
31
+ end
32
+
33
+
34
+ ##
35
+ # <=> defines how to compare two Query objects
36
+ def <=> other
37
+ return nil unless other.is_a?(Query)
38
+ comparison = 0
39
+ # first we compare the tx id
40
+ if (self.tx_id <=> other.tx_id) == 0
41
+ # if we also have the same query
42
+ if (self.query.sort <=> other.query.sort) == 0
43
+ # use history size as comparator
44
+ comparison = (self.model_size <=> other.model_size)
45
+ else
46
+ # use the query
47
+ comparison = (self.query.sort <=> other.query.sort)
48
+ end
49
+ else
50
+ # use the tx id
51
+ comparison = (self.tx_id <=> other.tx_id)
52
+ end
53
+ comparison
54
+ end
55
+
56
+ ##
57
+ # Executes a query given the current paramaters
58
+ # This results in a set of association rules, i.e., a recommendation
59
+ #
60
+ # Producing a recommendation is done through the following process:
61
+ #
62
+ # 1. Generate rules using a mining algorithm on the specified history
63
+ # 2. Calculate interestingness measures on the generated rules
64
+ # (optional) 3. Aggregate rules to further improve recommendation
65
+ # (optional) 4. Evaluate how good the recommendation is
66
+ #
67
+ # @return [Hash] containing the query + scenario + recommendation + other metadata
68
+ def call(evaluators: [])
69
+ #generate recommendation in cache (generate rules + measures on rules)
70
+ self.recommendation
71
+
72
+ # evaluate if requested
73
+ if !evaluators.empty?
74
+ Evoc::RecommendationCache.evaluate(evaluators: evaluators,expected_outcome: self.expected_outcome,measure_combination: self.measures)
75
+ end
76
+ # build return hash
77
+ recommendation = Evoc::RecommendationCache.to_h(measures: self.measures)
78
+ return self.to_h.merge(recommendation)
79
+ end
80
+
81
+ def to_h
82
+ fields = %w(case_id granularity scenario_id tx_id tx_index tx_size query_size query_percentage expected_outcome_size model_size model_hours model_age max_size algorithm aggregator measures)
83
+ hash = Hash.new
84
+ fields.each do |key|
85
+ value = self.method(key).call
86
+ hash[key] = value.is_a?(Array) ? value.join(',') : value
87
+ end
88
+ return hash
89
+ end
90
+
91
+ def recommendation
92
+ Evoc::RecommendationCache.get_recommendation(algorithm: self.algorithm,
93
+ query: self.query,
94
+ model_start: self.model_start,
95
+ model_end: self.model_end,
96
+ max_size: self.max_size,
97
+ aggregator: self.aggregator,
98
+ measures: self.measures)
99
+ end
100
+
101
+ def recommendation?
102
+ Evoc::RecommendationCache.recommendation_cached?(algorithm: self.algorithm,
103
+ query: self.query,
104
+ model_start: self.model_start,
105
+ model_end: self.model_end,
106
+ max_size: self.max_size)
107
+ end
108
+
109
+ def to_s
110
+ self.opts
111
+ end
112
+
113
+
114
+ ##
115
+ # CUSTOM SETTERS AND GETTERS
116
+ #
117
+
118
+ ##
119
+ # @return [Evoc::Tx] the transaction of this scenario
120
+ def tx
121
+ tx = nil
122
+ if !self.tx_id.nil?
123
+ tx = Evoc::HistoryStore.base_history.get_tx(id: self.tx_id, id_type: :id)
124
+ end
125
+ return tx
126
+ end
127
+
128
+ ##
129
+ # @return [Integer] the size of the query
130
+ def query_size
131
+ self.query.size
132
+ end
133
+
134
+ ##
135
+ # @return [Float] the percentage of the whole history that is used in the model
136
+ def model_percentage
137
+ self.model_size.to_i == 0 ? 100 : ((self.model_size.to_f/self.tx_index)*100).round(2)
138
+ end
139
+
140
+ ##
141
+ # Sets the model size in this scenario
142
+ #
143
+ # If set to 0 (zero), the maximum possible size is calculated
144
+ # In this case, model age will be taken into account
145
+ #
146
+ # @param [Integer] the size of the model
147
+ def model_size=(size)
148
+ if size.to_i == 0
149
+ @model_size = (self.model_age.nil? ? self.tx_index : self.tx_index - self.model_age)
150
+ else
151
+ @model_size = size.to_i
152
+ end
153
+ end
154
+
155
+ ##
156
+ # @return [Integer] the index in the history where the model starts
157
+ def model_start
158
+ value = self.tx_index - self.model_size.to_i - self.model_age.to_i
159
+ if value < 0
160
+ raise ArgumentError, "The model start index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
161
+ end
162
+ return value
163
+ end
164
+
165
+ ##
166
+ # @return [Integer] the index in the history where the model ends
167
+ def model_end
168
+ value = (self.tx_index - 1 - self.model_age.to_i)
169
+ if value < 0
170
+ raise ArgumentError, "The model end index was negative (model_size:#{self.model_size}, tx_index:#{self.tx_index})"
171
+ elsif value < self.model_start
172
+ raise ArgumentError, "The model end was before the model start (start: #{self.model_start}, end: #{value})"
173
+ end
174
+ return value
175
+ end
176
+
177
+ ##
178
+ # @return [Integer] the time between the first and last transaction in the model
179
+ def model_hours
180
+ model_end_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_end)
181
+ model_start_tx = Evoc::HistoryStore.base_history.get_tx(id: self.model_start)
182
+ return TimeDifference.between(model_start_tx.date,model_end_tx.date).in_hours
183
+ end
184
+
185
+ ##
186
+ # query_percentage
187
+ def query_percentage
188
+ total_items = self.query.size.to_i + self.expected_outcome.size
189
+ (self.query.size.to_f/total_items*100).round
190
+ end
191
+
192
+
193
+ ##
194
+ # max_size
195
+ def max_size=(value)
196
+ if !value.nil?
197
+ if value.respond_to?(:to_i)
198
+ @max_size = value.to_i
199
+ else
200
+ raise ArgumentError.new, "value for max_size could not be converted to integer, value was: #{value}"
201
+ end
202
+ end
203
+ end
204
+
205
+ ##
206
+ # custom setter for query
207
+ def query=(query)
208
+ if !query.nil?
209
+ @query = query
210
+ # convert to list of integers
211
+ @query.map!(&:to_i)
212
+ if !self.tx.nil?
213
+ if @query.size >= (self.tx.size)
214
+ $stderr.warn "The query was larger than or equal to the size of the transaction (Qs: #{@query.size}, Tx size: #{self.tx.size})"
215
+ end
216
+ end
217
+ if @query.empty?
218
+ raise ArgumentError, "The query was empty"
219
+ end
220
+ end
221
+ end
222
+
223
+ ##
224
+ # Custom setter for measures
225
+ def measures=(measures)
226
+ if !measures.nil?
227
+ # internally, all interestingness measures are
228
+ # represented as :m_measurename (symbols), so we concatinate
229
+ # 'm_' and symbolize the list of given measures
230
+ if measures.is_a?(String)
231
+ measures = measures.split(',')
232
+ end
233
+ @measures = measures.map {|m| (/\Am_/ =~ m).nil? ? ('m_'+m).to_sym : m.to_sym}
234
+ end
235
+ end
236
+
237
+ ##
238
+ # @return [Array] the list of expected items
239
+ def expected_outcome
240
+ expected_outcome = (self.tx.items - self.query)
241
+ if expected_outcome.empty?
242
+ logger.warn "The expected outcome was empty"
243
+ end
244
+ return expected_outcome
245
+ end
246
+
247
+ ##
248
+ # @return [Integer] the size of the expected outcome
249
+ def expected_outcome_size
250
+ self.expected_outcome.size
251
+ end
252
+
253
+ ##
254
+ # @return [String] the history index of this scenarios transaction
255
+ def tx_index
256
+ self.tx.index
257
+ end
258
+
259
+ ##
260
+ # @return [Integer] the size of this scenarios transaction
261
+ def tx_size
262
+ self.tx.size
263
+ end
264
+
265
+ ##
266
+ #
267
+ def instance_values_for_csv
268
+ dont_include = ['opts', 'logger','time','filtered_model_size']
269
+ self.instance_values.delete_if {|k,v| dont_include.include?(k)}
270
+ end
271
+
272
+ ##
273
+ # generate an array suitable for a csv header
274
+ def csv_header
275
+ query = self.instance_values_for_csv.keys
276
+ rule_store = !self.recommendation? ? [] : self.recommendation.csv_header
277
+ rule_store + query
278
+ end
279
+
280
+ ##
281
+ # generate an array of the current values of <self>
282
+ # converts any array values to a comma separated string representation
283
+ def to_csv_row
284
+ query = self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val}
285
+ rule_store = !self.recommendation? ? [] : self.recommendation.to_csv_row
286
+ rule_store + query
287
+ end
288
+
289
+
290
+
291
+ ##
292
+ # Prints the rules to standard out
293
+ # sorted by strength
294
+ def print
295
+ if !self.recommendation?
296
+ $stdout.puts ""
297
+ else
298
+ self.recommendation.print(measures)
299
+ end
300
+ end
301
+
302
+ end
303
+ end
data/lib/evoc/svd.rb ADDED
@@ -0,0 +1,124 @@
1
+ module Evoc
2
+ ##
3
+ # CLASS SVD
4
+ #
5
+ # public fields:
6
+ # index:
7
+ # hash of {index -> file}, where file is at "index" in the co_change_matrix and svd matrix
8
+ class SVD
9
+ attr_accessor :co_change_matrix, :u, :s, :v
10
+
11
+ def initialize(tx_store = nil)
12
+ if tx_store.is_a?(Evoc::TxStore)
13
+ @index2item,@item2index,@co_change_matrix = txstore_2_co_change_matrix(tx_store)
14
+ @u,@s,@v = svd(@co_change_matrix)
15
+ end
16
+ end
17
+
18
+ def indexes
19
+ @index2item.keys
20
+ end
21
+
22
+ def items
23
+ @item2index.keys
24
+ end
25
+
26
+ def index2item index
27
+ @index2item[index]
28
+ end
29
+
30
+ def item2index item
31
+ @item2index[item]
32
+ end
33
+
34
+ def svd(co_change_matrix)
35
+ u,s,v = co_change_matrix.gesvd
36
+ end
37
+
38
+ ##
39
+ # Find the clusters in the current svd given a change-vector/query
40
+ #
41
+ # threshold: the minimum value of an element in the U matrix,
42
+ # to be considered as part of an cluster
43
+ def clusters(query,threshold = 0)
44
+ clusters = Hash.new
45
+ perfect_match = []
46
+ query_indexes = query.map {|q_item| item2index(q_item)}.compact #remove nil values
47
+ col_index = 0
48
+ self.u.each_column do |col|
49
+ #initiate cluster
50
+ clusters[col_index] = {pos: {query_match: [], clustered: []},
51
+ neg: {query_match: [], clustered: []}}
52
+ # get the column of the item
53
+ col.each_with_index do |row_item,row_index|
54
+ # check that the row item is part of cluster
55
+ if row_item.abs > threshold
56
+ sign = row_item > 0 ? :pos : :neg
57
+ # check if its another item from the query
58
+ if query_indexes.include? row_index
59
+ clusters[col_index][sign][:query_match] << index2item(row_index)
60
+ # check if all items in the cluster was in the query (perfect match)
61
+ if clusters[col_index][sign][:query_match].size == query.size
62
+ perfect_match << [col_index,sign]
63
+ end
64
+ else
65
+ clusters[col_index][sign][:clustered] << [index2item(row_index),row_item]
66
+ end
67
+ end
68
+ end
69
+ col_index += 1
70
+ end
71
+ [perfect_match,clusters]
72
+ end
73
+
74
+
75
+ ##
76
+ # Returns the co-change matrix of currently loaded files
77
+ # each column/row combination specifies how many times the
78
+ # two files changed together
79
+ #
80
+ # f1 f2 f3
81
+ # f1 2 1 1
82
+ # f2 1 3 1
83
+ # f3 1 1 1
84
+ def txstore_2_co_change_matrix(tx_store)
85
+ co_change_hash = Hash.new
86
+ tx_store.each do |tx|
87
+ tx.items.each do |file_id|
88
+ if co_change_hash[file_id] == nil
89
+ co_change_hash[file_id] = {:co_changed => Hash.new}
90
+ end
91
+ tx.items.each do |co_changed_file_id|
92
+ if co_change_hash[file_id][:co_changed][co_changed_file_id] == nil
93
+ co_change_hash[file_id][:co_changed][co_changed_file_id] = 1
94
+ else
95
+ co_change_hash[file_id][:co_changed][co_changed_file_id] += 1
96
+ end
97
+ end
98
+ end
99
+ end
100
+ # add indexes
101
+ co_change_hash.each_with_index do |(key,value),index|
102
+ co_change_hash[key][:index] = index
103
+ end
104
+ # Generate the 2 wise dependency weight array
105
+ #
106
+ n = co_change_hash.size
107
+ co_change_matrix = NMatrix.new(n,0,dtype: :float64)
108
+ co_change_hash.each_with_index do |(key,value),index|
109
+ this_file = index
110
+ value[:co_changed].each do |(co_changed_file,sum_co_changes)|
111
+ co_index = (co_change_hash[co_changed_file][:index])
112
+ co_change_matrix[co_index,this_file] = sum_co_changes
113
+ end
114
+ end
115
+ index2item = Hash.new
116
+ item2index = Hash.new
117
+ co_change_hash.each do |k,v|
118
+ index2item[v[:index]] = k
119
+ item2index[k] = v[:index]
120
+ end
121
+ [index2item,item2index,co_change_matrix]
122
+ end
123
+ end
124
+ end
data/lib/evoc/tx.rb ADDED
@@ -0,0 +1,34 @@
1
+ module Evoc
2
+ class Tx
3
+ include Comparable
4
+ attr_reader :id, :date
5
+ attr_accessor :items, :index
6
+
7
+ def initialize(index: nil,id:,date: nil,items:)
8
+ @index = index
9
+ @id = id
10
+ @date = date
11
+ @items = items
12
+ end
13
+
14
+ def size
15
+ self.items.size
16
+ end
17
+
18
+ def <=> other
19
+ self.index <=> other.index
20
+ end
21
+
22
+ def to_i
23
+ self.index.to_i
24
+ end
25
+
26
+ def to_s
27
+ self.id.to_s
28
+ end
29
+
30
+ def to_a
31
+ self.items.to_a
32
+ end
33
+ end
34
+ end