evoc 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/Makefile +4 -0
  8. data/README.md +61 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/evoc +3 -0
  12. data/bin/setup +7 -0
  13. data/evoc.gemspec +30 -0
  14. data/lib/evoc/algorithm.rb +147 -0
  15. data/lib/evoc/algorithms/top_k.rb +86 -0
  16. data/lib/evoc/analyze.rb +395 -0
  17. data/lib/evoc/array.rb +43 -0
  18. data/lib/evoc/evaluate.rb +109 -0
  19. data/lib/evoc/exceptions/aggregation_error.rb +6 -0
  20. data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
  21. data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
  22. data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
  23. data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
  24. data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
  25. data/lib/evoc/exceptions/no_result.rb +6 -0
  26. data/lib/evoc/exceptions/non_finite.rb +8 -0
  27. data/lib/evoc/exceptions/non_numeric.rb +8 -0
  28. data/lib/evoc/exceptions/not_a_query.rb +6 -0
  29. data/lib/evoc/exceptions/not_a_result.rb +6 -0
  30. data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
  31. data/lib/evoc/exceptions/not_initialized.rb +6 -0
  32. data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
  33. data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
  34. data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
  35. data/lib/evoc/experiment.rb +239 -0
  36. data/lib/evoc/hash.rb +56 -0
  37. data/lib/evoc/history_store.rb +53 -0
  38. data/lib/evoc/hyper_rule.rb +53 -0
  39. data/lib/evoc/interestingness_measure.rb +77 -0
  40. data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
  41. data/lib/evoc/interestingness_measures.rb +882 -0
  42. data/lib/evoc/logger.rb +34 -0
  43. data/lib/evoc/memory_profiler.rb +43 -0
  44. data/lib/evoc/recommendation_cache.rb +152 -0
  45. data/lib/evoc/rule.rb +32 -0
  46. data/lib/evoc/rule_store.rb +340 -0
  47. data/lib/evoc/scenario.rb +303 -0
  48. data/lib/evoc/svd.rb +124 -0
  49. data/lib/evoc/tx.rb +34 -0
  50. data/lib/evoc/tx_store.rb +379 -0
  51. data/lib/evoc/version.rb +3 -0
  52. data/lib/evoc.rb +4 -0
  53. data/lib/evoc_cli/analyze.rb +198 -0
  54. data/lib/evoc_cli/cli_helper.rb +1 -0
  55. data/lib/evoc_cli/experiment.rb +78 -0
  56. data/lib/evoc_cli/info.rb +22 -0
  57. data/lib/evoc_cli/main.rb +29 -0
  58. data/lib/evoc_cli/util.rb +36 -0
  59. data/lib/evoc_helper.rb +40 -0
  60. data/mem_profiler/Gemfile.lock +39 -0
  61. data/mem_profiler/README.md +126 -0
  62. data/mem_profiler/createdb.rb +4 -0
  63. data/mem_profiler/db.rb +82 -0
  64. data/mem_profiler/gemfile +6 -0
  65. data/mem_profiler/gencsv.rb +64 -0
  66. data/mem_profiler/genimport.sh +8 -0
  67. data/mem_profiler/graph.rb +91 -0
  68. metadata +251 -0
@@ -0,0 +1,395 @@
1
+ module Evoc
2
+ class Analyze
3
+
4
+ attr_accessor :opts, :tx_store
5
+
6
+ def initialize(opts)
7
+ self.opts = opts
8
+ Logging.set_level(self.opts[:logger_level])
9
+ if opts[:tx_store].nil?
10
+ self.tx_store = TxStore.new(path: opts[:transactions],case_id: self.opts[:case_id], granularity: self.opts[:granularity])
11
+ else
12
+ self.tx_store = opts[:tx_store]
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Perform all of the numerical analyzes
18
+ #
19
+ # Prints to stdout
20
+ def all
21
+ methods = %W(num_commits percent_method_changes_of_all_changes avg_changes_per_file avg_method_changes_per_parsable_file num_unique_changes average_changes_per_commit average_commits_per_specific_change time_span_in_years average_time_between_commits_in_minutes)
22
+
23
+ CSV {|row| row << methods}
24
+ results = []
25
+ methods.each do |m|
26
+ results << self.method(m).call
27
+ end
28
+ CSV {|row| row << results}
29
+ end
30
+
31
+ def avg_method_changes_per_parsable_file
32
+ parsable_files_changed = 0
33
+ method_changes = 0
34
+ self.tx_store.each do |tx|
35
+ named_items = tx.items.map {|i| self.tx_store.int_2_name[i]}
36
+ # group changed items by file name
37
+ named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes|
38
+ # check if any method changes were found for this file
39
+ if changes.any? {|c| c =~ /:(?!@residuals)/}
40
+ parsable_files_changed += 1
41
+ # count number of method changes per file group
42
+ changes.each do |change|
43
+ if change =~ /:(?!@residuals)/
44
+ method_changes += 1
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ return (method_changes.to_f/parsable_files_changed).round(2)
51
+ end
52
+
53
+ def avg_changes_per_file
54
+ files_changed = 0
55
+ total_changes = 0
56
+ self.tx_store.each do |tx|
57
+ named_items = tx.items.map {|i| self.tx_store.int_2_name[i]}
58
+ named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes|
59
+ files_changed += 1
60
+ total_changes += changes.size
61
+ end
62
+ end
63
+ return (total_changes.to_f/files_changed).round(2)
64
+ end
65
+
66
+ ##
67
+ # @return [Float] the percentage of changes that are method level
68
+ def percent_method_changes_of_all_changes
69
+ total_changes = 0
70
+ file_changes = 0
71
+ self.tx_store.each do |tx|
72
+ tx.items.each do |item|
73
+ total_changes += 1
74
+ if self.tx_store.int_2_name[item] =~ /^[^:]+?(\.[^:\/]+?)?(?::@residuals$|$)/
75
+ file_changes += 1
76
+ end
77
+ end
78
+ end
79
+ return ((1 - file_changes.to_f/total_changes)*100).round(2)
80
+ end
81
+
82
+ def num_commits
83
+ self.tx_store.size
84
+ end
85
+
86
+ def average_commits_per_specific_change
87
+ (self.tx_store.items.values.inject(0) {|sum,txes| sum + txes.size}.to_f/self.tx_store.items.size).round(2)
88
+ end
89
+
90
+ ##
91
+ # Prints a CSV formated table of the top N frequent files
92
+ # N is configured in opts[:top]
93
+ def file_frequency
94
+ # print header
95
+ CSV {|row| row << %W(file frequency)}
96
+ frequency = self.tx_store.items.map {|item,txes| [item, txes.size] }
97
+ frequency.sort_by! {|item,freq| -freq}
98
+ frequency.take(self.opts[:top]).each do |file,freq|
99
+ filename = self.tx_store.int_2_name[file]
100
+ CSV {|row| row << [filename,freq]}
101
+ end
102
+ end
103
+
104
+ ##
105
+ # Dumps the commit sizes, one commit per line
106
+ def commit_size
107
+ if self.opts[:group]
108
+ $stdout.puts 'commit_size,frequency'
109
+ self.tx_store.group_by(&:size).sort.each do |size,txes|
110
+ STDOUT.puts "#{size},#{txes.size}"
111
+ end
112
+ else
113
+ $stdout.puts 'commit_size'
114
+ self.tx_store.each_with_index do |tx,index|
115
+ $stderr.print "Dumping commit sizes: #{index+1} of #{self.tx_store.size} \r"
116
+ $stdout.puts tx.size
117
+ end
118
+ end
119
+ $stderr.puts "DONE "
120
+ end
121
+
122
+ def num_unique_changes
123
+ self.tx_store.map(&:items).flatten.uniq.size
124
+ end
125
+
126
+ def average_changes_per_commit
127
+ arr = self.tx_store.map(&:size)
128
+ (arr.inject{ |sum, el| sum + el }.to_f / arr.size).round(2)
129
+ end
130
+
131
+ # added but not used as the data is not interesting --LM
132
+ def median_changes_per_commit
133
+ arr = self.tx_store.map(&:size)
134
+ sorted = arr.sort
135
+ len = sorted.length
136
+ (sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
137
+ end
138
+
139
+ def time_span_in_years
140
+ time_span(unit: 'years')
141
+ end
142
+
143
+ def average_time_between_commits_in_minutes
144
+ average_time_between_commits(unit: 'minutes').round(2)
145
+ end
146
+
147
+
148
+ def uniqueness
149
+ result = Hash.new
150
+
151
+ self.tx_store.each do |tx|
152
+ query_size = tx.size-1
153
+ queries = tx.items.combination(query_size).to_a
154
+ previous_history = self.tx_store.clone_with_subset(0,tx.index)
155
+
156
+ queries.each do |query|
157
+ hits = previous_history.transactions_of_list(query,true).size
158
+ if result[hits].nil?
159
+ result[hits] = 1
160
+ else
161
+ result[hits] += 1
162
+ end
163
+ end
164
+
165
+ end
166
+
167
+ result
168
+ end
169
+
170
+ def measure_values
171
+ # generate some random rules with min sup 1
172
+ samplable = self.tx_store.select {|tx| tx.size > 1}
173
+ measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort
174
+ CSV {|r| r << measures} # print header
175
+ self.opts[:number].times do |i|
176
+ random_tx = samplable.sample
177
+ lhs = random_tx.items.sample(rand(1..random_tx.size-1))
178
+ rhs = (random_tx.items-lhs).sample
179
+ r = Evoc::Rule.new(lhs: lhs, rhs: rhs, tx_store: self.tx_store)
180
+ row = CSV::Row.new([],[],false)
181
+ measures.each do |m|
182
+ measure = r.get_measure(m)
183
+ if value = measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure
184
+ row << value.to_f
185
+ else
186
+ row << nil
187
+ end
188
+ end
189
+ CSV {|r| r << row} # print row
190
+ end
191
+ end
192
+
193
+ def measure_ranges
194
+ #rules = []
195
+ #ab_range = ([0.00000001]+(0.0..1).step(0.01).to_a+[0.99999999]).map(&:rationalize)
196
+ #ab_range.each do |ab|
197
+ # ab_zero = ((ab == 0) ? 0.0000001 : 0)
198
+ # a_range = (ab+ab_zero..1).step(0.01).map(&:rationalize)
199
+ # b_range = (ab+ab_zero..1).step(0.01).map(&:rationalize)
200
+ # # construct all rules starting from A
201
+ # a_range.each do |a|
202
+ # new_b_range = (ab..1-a+ab).step(0.01).map(&:rationalize)
203
+ # new_b_range.each do |b|
204
+ # if (a == 0) | (b == 0)
205
+ # # a or b cant be 0
206
+ # next
207
+ # else
208
+ # r = Evoc::Rule.new(lhs: [], rhs: [])
209
+ # r.set_p('p_A',a)
210
+ # r.set_p('p_B',b)
211
+ # r.set_p('p_AB',ab)
212
+ # rules << r
213
+ # end
214
+ # end
215
+ # end
216
+ # # construct all rules starting from B
217
+ # b_range.each do |b|
218
+ # new_a_range = (ab..1-b+ab).step(0.01).map(&:rationalize)
219
+ # new_a_range.each do |a|
220
+ # r = Evoc::Rule.new(lhs: [], rhs: [])
221
+ # r.set_p('p_A',a)
222
+ # r.set_p('p_B',b)
223
+ # r.set_p('p_AB',ab)
224
+ # rules << r
225
+ # end
226
+ # end
227
+ #end
228
+ ## A and B never change together, but do change
229
+ r1 = Evoc::Rule.new(lhs: [], rhs: [])
230
+ r1.set_p('p_A',1.to_r/4)
231
+ r1.set_p('p_B',1.to_r/4)
232
+ r1.set_p('p_AB',0)
233
+ # A and B never change together, and A almost never change
234
+ r2 = Evoc::Rule.new(lhs: [], rhs: [])
235
+ r2.set_p('p_A',1.to_r/1000000)
236
+ r2.set_p('p_B',1.to_r/2)
237
+ r2.set_p('p_AB',0)
238
+ # A and B never change together, and B almost never change
239
+ r3 = Evoc::Rule.new(lhs: [], rhs: [])
240
+ r3.set_p('p_A',1.to_r/2)
241
+ r3.set_p('p_B',1.to_r/1000000)
242
+ r3.set_p('p_AB',0)
243
+ # A and B never change together, but change half of the time
244
+ r4 = Evoc::Rule.new(lhs: [], rhs: [])
245
+ r4.set_p('p_A',1.to_r/2)
246
+ r4.set_p('p_B',1.to_r/2)
247
+ r4.set_p('p_AB',0)
248
+
249
+ # A and B never change together, and A and B almost never change
250
+ r5 = Evoc::Rule.new(lhs: [], rhs: [])
251
+ r5.set_p('p_A',1.to_r/1000000)
252
+ r5.set_p('p_B',1.to_r/1000000)
253
+ r5.set_p('p_AB',0)
254
+
255
+ # A and B always change together
256
+ r6 = Evoc::Rule.new(lhs: [], rhs: [])
257
+ r6.set_p('p_A',1.to_r/3)
258
+ r6.set_p('p_B',1.to_r/3)
259
+ r6.set_p('p_AB',1.to_r/3)
260
+ # A and B always change together, but rarely change
261
+ r7 = Evoc::Rule.new(lhs: [], rhs: [])
262
+ r7.set_p('p_A',1.to_r/1000000)
263
+ r7.set_p('p_B',1.to_r/1000000)
264
+ r7.set_p('p_AB',1.to_r/1000000)
265
+ # A and B always change together, always
266
+ r8 = Evoc::Rule.new(lhs: [], rhs: [])
267
+ r8.set_p('p_A',1)
268
+ r8.set_p('p_B',1)
269
+ r8.set_p('p_AB',1)
270
+
271
+ # B always change when A change, but rarely change
272
+ r9 = Evoc::Rule.new(lhs: [], rhs: [])
273
+ r9.set_p('p_A',1)
274
+ r9.set_p('p_B',1.to_r/10000000)
275
+ r9.set_p('p_AB',1.to_r/10000000)
276
+
277
+ # A always change when B change, but rarely change
278
+ r10 = Evoc::Rule.new(lhs: [], rhs: [])
279
+ r10.set_p('p_A',1.to_r/10000000)
280
+ r10.set_p('p_B',1)
281
+ r10.set_p('p_AB',1.to_r/10000000)
282
+
283
+ # A and B sometimes change together, B always
284
+ r11 = Evoc::Rule.new(lhs: [], rhs: [])
285
+ r11.set_p('p_A',1.to_r/2)
286
+ r11.set_p('p_B',1)
287
+ r11.set_p('p_AB',1.to_r/2)
288
+
289
+ # A and B sometimes change together, A always
290
+ r12 = Evoc::Rule.new(lhs: [], rhs: [])
291
+ r12.set_p('p_A',1)
292
+ r12.set_p('p_B',1.to_r/2)
293
+ r12.set_p('p_AB',1.to_r/2)
294
+
295
+ # A and B always change together, almost always
296
+ r13 = Evoc::Rule.new(lhs: [], rhs: [])
297
+ r13.set_p('p_A',0.99999999.rationalize)
298
+ r13.set_p('p_B',0.99999999.rationalize)
299
+ r13.set_p('p_AB',0.99999999.rationalize)
300
+
301
+ # A and B always change together, half of the time
302
+ r14 = Evoc::Rule.new(lhs: [], rhs: [])
303
+ r14.set_p('p_A',0.5.rationalize)
304
+ r14.set_p('p_B',0.5.rationalize)
305
+ r14.set_p('p_AB',0.5.rationalize)
306
+
307
+ # A and B sometimes change together, A always
308
+ r15 = Evoc::Rule.new(lhs: [], rhs: [])
309
+ r15.set_p('p_A',0.4.rationalize)
310
+ r15.set_p('p_B',0.4.rationalize)
311
+ r15.set_p('p_AB',0.1.rationalize)
312
+
313
+ # A and B sometimes change together, A always
314
+ r16 = Evoc::Rule.new(lhs: [], rhs: [])
315
+ r16.set_p('p_A',1.to_r/3)
316
+ r16.set_p('p_B',1.to_r/2)
317
+ r16.set_p('p_AB',1.to_r/4)
318
+
319
+ rules = [r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,r16]
320
+
321
+ measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort
322
+ CSV {|r| r << [nil,nil,nil,nil,'min_configuration',nil,nil,'max_configuration',nil,nil]}
323
+ CSV {|r| r << ['measure','current_range','min','max','P(A)','P(B)','P(A,B)','P(A)','P(B)','P(A,B)']} # print header
324
+ measures.each do |m|
325
+ values = Hash.new
326
+ rules.each do |r|
327
+ measure = r.get_measure(m)
328
+ if value = (measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure)
329
+ if value != Float::NAN
330
+ a = r.p_A
331
+ b = r.p_B
332
+ ab = r.p_AB
333
+ key = "#{a},#{b},#{ab}"
334
+ values[key] = Hash.new
335
+ values[key][:m] = measure
336
+ values[key][:v] = value.to_f
337
+ values[key][:a] = a
338
+ values[key][:b] = b
339
+ values[key][:ab] = ab
340
+ end
341
+ end
342
+ end
343
+ min = values.min_by {|k,v| v[:v]}
344
+ max = values.max_by {|k,v| v[:v]}
345
+ current_range = (min[1][:m].is_a?(Evoc::InterestingnessMeasure) ? "[#{Evoc::InterestingnessMeasures.get_min(m)},#{Evoc::InterestingnessMeasures.get_max(m)}]" : "[0,1]")
346
+ CSV {|r| r << [m,current_range,min[1][:v],max[1][:v],min[1][:a],min[1][:b],min[1][:ab],max[1][:a],max[1][:b],max[1][:ab]]} # print row
347
+ end
348
+ end
349
+
350
+ def aggregator_range
351
+ r = Random.new
352
+ $stdout.puts 'aggregator,length,range,y'
353
+ (1..1000).each do |length|
354
+ list = ([1]*length).map {|i| i - rand(0.00001..1)}.sort.reverse
355
+ list_inf = ([1]*length).map {|i| rand(0..10)}.sort.reverse
356
+ aggregator = Evoc::InterestingnessMeasureAggregator.new('m_support',list)
357
+ aggregator_inf = Evoc::InterestingnessMeasureAggregator.new('m_hyper_coefficient',list_inf)
358
+ $stdout.puts "CG,#{length},\"0..1\",#{aggregator.cg}"
359
+ $stdout.puts "DCG,#{length},\"0..1\",#{aggregator.dcg}"
360
+ $stdout.puts "DCG2,#{length},\"0..1\",#{aggregator.dcg2}"
361
+ $stdout.puts "CG,#{length},inf,#{aggregator_inf.cg}"
362
+ $stdout.puts "DCG,#{length},inf,#{aggregator_inf.dcg}"
363
+ $stdout.puts "DCG2,#{length},inf,#{aggregator_inf.dcg2}"
364
+ end
365
+
366
+ end
367
+
368
+ def create_dict
369
+ puts "id,name"
370
+ self.tx_store.int_2_name.each do |id,name|
371
+ puts "#{id},#{name}"
372
+ end
373
+ end
374
+
375
+ private
376
+
377
+ def time_span(unit: 'years')
378
+ t1 = self.tx_store.first.date
379
+ t2 = self.tx_store.last.date
380
+ TimeDifference.between(t1,t2).method('in_'+unit).call
381
+ end
382
+
383
+ def average_time_between_commits(unit: 'hours')
384
+ if self.tx_store.size > 1
385
+ t1 = self.tx_store.first.date
386
+ t2 = self.tx_store.last.date
387
+ total_time = TimeDifference.between(t1,t2).method('in_'+unit).call
388
+ total_time.to_f/(self.tx_store.size-1)
389
+ else
390
+ raise Exception.new, "History only contained 1 or 0 transactions"
391
+ end
392
+ end
393
+ end #class
394
+ end #module
395
+
data/lib/evoc/array.rb ADDED
@@ -0,0 +1,43 @@
1
+ class Array
2
+ def subset?(other)
3
+ self & other == self
4
+ end
5
+
6
+ def include_any?(other)
7
+ (self & other).size > 0
8
+ end
9
+
10
+ ##
11
+ # returns the union of an array of arraya
12
+ def array_union
13
+ if union = self.inject(:|)
14
+ return union
15
+ else
16
+ return []
17
+ end
18
+ end
19
+
20
+ ##
21
+ # returns the intersection of a list of lists
22
+ def array_intersection
23
+ if intersection = self.inject(:&)
24
+ return intersection
25
+ else
26
+ return []
27
+ end
28
+ end
29
+
30
+ ##
31
+ # returns the list of items in self that was not in other
32
+ def array_difference(other)
33
+ self.map {|a| a - other}.array_union
34
+ end
35
+
36
+ def self.powerset(set)
37
+ return [set] if set.empty?
38
+ p = set.pop
39
+ subset = powerset(set)
40
+ subset | subset.map { |x| x | [p] }
41
+ end
42
+
43
+ end
@@ -0,0 +1,109 @@
1
+ module Evoc
2
+ class Evaluate
3
+ extend Logging
4
+
5
+
6
+ def self.execute(recommendation,expected_outcome,evaluator)
7
+ if match = /average_precision(?<num>\d+)?/.match(evaluator)
8
+ if match[:num].nil?
9
+ self.average_precision(recommendation,expected_outcome)
10
+ else
11
+ self.average_precision(recommendation,expected_outcome,n: match[:num].to_i)
12
+ end
13
+ elsif match = /top10_recall/.match(evaluator)
14
+ self.top10_recall(recommendation,expected_outcome)
15
+ else raise ArgumentError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
16
+ end
17
+ end
18
+
19
+ # calculate the ratio of correct items in the top 10
20
+ # @param [Array] recommendation a sorted array
21
+ # @param [Array] expected_outcome an array of items
22
+ # @return [Rational] the top10 recall
23
+ def self.top10_recall(recommendation,expected_outcome)
24
+ if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
25
+ if (expected_outcome.size > 0) & !recommendation.empty?
26
+ top10 = recommendation.take(10).flatten
27
+ common_items = (expected_outcome & top10).size.to_r
28
+ return common_items/expected_outcome.size
29
+ else
30
+ nil
31
+ end
32
+ end
33
+
34
+ ##
35
+ # calculate the average precision of the result based on an expected outcome
36
+ # @param [Array] recommendation a sorted array
37
+ # @param [Array] expected_outcome an array of items
38
+ # @return [Float] the average precision
39
+ def self.average_precision(recommendation,expected_outcome, n: recommendation.size)
40
+ if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
41
+ if (expected_outcome.size > 0) & !recommendation.empty?
42
+ average_precision = 0
43
+ correct_items = []
44
+ total_items_considered = []
45
+ # sort rules by weight
46
+ # we first group rules with equal weights
47
+ # and then sort the groups by weight
48
+ recommendation.take(n).each do |items|
49
+ if !items.is_a?(Array) then items = [items] end
50
+ if items.first.class != expected_outcome.first.class
51
+ raise ArgumentError, "Expected outcome was of type #{expected_outcome.first.class}, while the item in the recommendation was of type #{items.first.class}"
52
+ end
53
+ # skip already considered items
54
+ if (new_items = items - total_items_considered).size > 0
55
+ new_items.each {|item| total_items_considered << item}
56
+ if correct_in_rule = (items & expected_outcome)
57
+ if correct_in_rule.size > 0
58
+ # make sure that the new items havent already been added earlier
59
+ new_correct = (correct_in_rule - correct_items)
60
+ # add new items
61
+ new_correct.each {|item| correct_items << item}
62
+ change_in_recall = new_correct.size.to_r/expected_outcome.size
63
+ precision_at_k = correct_items.size.to_r/total_items_considered.size
64
+ average_precision += (precision_at_k * change_in_recall)
65
+ end
66
+ end
67
+ end
68
+ end
69
+ average_precision.to_f
70
+ else
71
+ nil
72
+ end
73
+ end
74
+
75
+ # calculate the grouped average precision of the result based on an expected outcome
76
+ def self.e_collected_average_precision(expected_outcome)
77
+ if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
78
+ if (expected_outcome.size > 0) & !self.empty?
79
+ collected_average_precision = 0
80
+ correct_items = []
81
+ total_items_considered = []
82
+ # sort rules by weight
83
+ # we first group rules with equal weights
84
+ # and then sort the groups by weight
85
+ groups = self.group_by {|r| r.weight}.sort.reverse
86
+ groups.each do |(_,rules)|
87
+ items = rules.map(&:rhs).flatten.uniq
88
+ if (new_items = items - total_items_considered).size > 0
89
+ new_items.each {|item| total_items_considered << item}
90
+ if correct_in_group = (items & expected_outcome)
91
+ if correct_in_group.size > 0
92
+ # make sure that the new items havent already been added earlier
93
+ new_correct = (correct_in_group - correct_items)
94
+ # add new items
95
+ new_correct.each {|item| correct_items << item}
96
+ change_in_recall = new_correct.size.to_r/expected_outcome.size
97
+ precision_at_k = correct_items.size.to_r/total_items_considered.size
98
+ collected_average_precision += (precision_at_k * change_in_recall)
99
+ end
100
+ end
101
+ end
102
+ end
103
+ self.collected_average_precision = collected_average_precision.to_f
104
+ else
105
+ self.collected_average_precision = nil
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class AggregationError < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class ExpectedOutcomeNilorEmpty < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class MeasureCalculationError < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NoChangedItemsInChanges < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NoChangesInJsonObject < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NoDateInJsonObject < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NoResult < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,8 @@
1
+ module Evoc
2
+ module Exceptions
3
+ module InterestingnessMeasure
4
+ class NonFinite < StandardError
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Evoc
2
+ module Exceptions
3
+ module InterestingnessMeasure
4
+ class NonNumeric < StandardError
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NotAQuery < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NotAResult < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NotATransaction < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class NotInitialized < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class OnlyNilInChanges < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class QueryNilOrEmpty < StandardError
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,6 @@
1
+ module Evoc
2
+ module Exceptions
3
+ class UnableToConvertJsonToTx < StandardError
4
+ end
5
+ end
6
+ end