evoc 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
data/lib/evoc/analyze.rb
ADDED
@@ -0,0 +1,395 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Analyze
|
3
|
+
|
4
|
+
attr_accessor :opts, :tx_store
|
5
|
+
|
6
|
+
def initialize(opts)
|
7
|
+
self.opts = opts
|
8
|
+
Logging.set_level(self.opts[:logger_level])
|
9
|
+
if opts[:tx_store].nil?
|
10
|
+
self.tx_store = TxStore.new(path: opts[:transactions],case_id: self.opts[:case_id], granularity: self.opts[:granularity])
|
11
|
+
else
|
12
|
+
self.tx_store = opts[:tx_store]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Perform all of the numerical analyzes
|
18
|
+
#
|
19
|
+
# Prints to stdout
|
20
|
+
def all
|
21
|
+
methods = %W(num_commits percent_method_changes_of_all_changes avg_changes_per_file avg_method_changes_per_parsable_file num_unique_changes average_changes_per_commit average_commits_per_specific_change time_span_in_years average_time_between_commits_in_minutes)
|
22
|
+
|
23
|
+
CSV {|row| row << methods}
|
24
|
+
results = []
|
25
|
+
methods.each do |m|
|
26
|
+
results << self.method(m).call
|
27
|
+
end
|
28
|
+
CSV {|row| row << results}
|
29
|
+
end
|
30
|
+
|
31
|
+
def avg_method_changes_per_parsable_file
|
32
|
+
parsable_files_changed = 0
|
33
|
+
method_changes = 0
|
34
|
+
self.tx_store.each do |tx|
|
35
|
+
named_items = tx.items.map {|i| self.tx_store.int_2_name[i]}
|
36
|
+
# group changed items by file name
|
37
|
+
named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes|
|
38
|
+
# check if any method changes were found for this file
|
39
|
+
if changes.any? {|c| c =~ /:(?!@residuals)/}
|
40
|
+
parsable_files_changed += 1
|
41
|
+
# count number of method changes per file group
|
42
|
+
changes.each do |change|
|
43
|
+
if change =~ /:(?!@residuals)/
|
44
|
+
method_changes += 1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return (method_changes.to_f/parsable_files_changed).round(2)
|
51
|
+
end
|
52
|
+
|
53
|
+
def avg_changes_per_file
|
54
|
+
files_changed = 0
|
55
|
+
total_changes = 0
|
56
|
+
self.tx_store.each do |tx|
|
57
|
+
named_items = tx.items.map {|i| self.tx_store.int_2_name[i]}
|
58
|
+
named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes|
|
59
|
+
files_changed += 1
|
60
|
+
total_changes += changes.size
|
61
|
+
end
|
62
|
+
end
|
63
|
+
return (total_changes.to_f/files_changed).round(2)
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# @return [Float] the percentage of changes that are method level
|
68
|
+
def percent_method_changes_of_all_changes
|
69
|
+
total_changes = 0
|
70
|
+
file_changes = 0
|
71
|
+
self.tx_store.each do |tx|
|
72
|
+
tx.items.each do |item|
|
73
|
+
total_changes += 1
|
74
|
+
if self.tx_store.int_2_name[item] =~ /^[^:]+?(\.[^:\/]+?)?(?::@residuals$|$)/
|
75
|
+
file_changes += 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
return ((1 - file_changes.to_f/total_changes)*100).round(2)
|
80
|
+
end
|
81
|
+
|
82
|
+
def num_commits
|
83
|
+
self.tx_store.size
|
84
|
+
end
|
85
|
+
|
86
|
+
def average_commits_per_specific_change
|
87
|
+
(self.tx_store.items.values.inject(0) {|sum,txes| sum + txes.size}.to_f/self.tx_store.items.size).round(2)
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Prints a CSV formated table of the top N frequent files
|
92
|
+
# N is configured in opts[:top]
|
93
|
+
def file_frequency
|
94
|
+
# print header
|
95
|
+
CSV {|row| row << %W(file frequency)}
|
96
|
+
frequency = self.tx_store.items.map {|item,txes| [item, txes.size] }
|
97
|
+
frequency.sort_by! {|item,freq| -freq}
|
98
|
+
frequency.take(self.opts[:top]).each do |file,freq|
|
99
|
+
filename = self.tx_store.int_2_name[file]
|
100
|
+
CSV {|row| row << [filename,freq]}
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
##
|
105
|
+
# Dumps the commit sizes, one commit per line
|
106
|
+
def commit_size
|
107
|
+
if self.opts[:group]
|
108
|
+
$stdout.puts 'commit_size,frequency'
|
109
|
+
self.tx_store.group_by(&:size).sort.each do |size,txes|
|
110
|
+
STDOUT.puts "#{size},#{txes.size}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
$stdout.puts 'commit_size'
|
114
|
+
self.tx_store.each_with_index do |tx,index|
|
115
|
+
$stderr.print "Dumping commit sizes: #{index+1} of #{self.tx_store.size} \r"
|
116
|
+
$stdout.puts tx.size
|
117
|
+
end
|
118
|
+
end
|
119
|
+
$stderr.puts "DONE "
|
120
|
+
end
|
121
|
+
|
122
|
+
def num_unique_changes
|
123
|
+
self.tx_store.map(&:items).flatten.uniq.size
|
124
|
+
end
|
125
|
+
|
126
|
+
def average_changes_per_commit
|
127
|
+
arr = self.tx_store.map(&:size)
|
128
|
+
(arr.inject{ |sum, el| sum + el }.to_f / arr.size).round(2)
|
129
|
+
end
|
130
|
+
|
131
|
+
# added but not used as the data is not interesting --LM
|
132
|
+
def median_changes_per_commit
|
133
|
+
arr = self.tx_store.map(&:size)
|
134
|
+
sorted = arr.sort
|
135
|
+
len = sorted.length
|
136
|
+
(sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
|
137
|
+
end
|
138
|
+
|
139
|
+
def time_span_in_years
|
140
|
+
time_span(unit: 'years')
|
141
|
+
end
|
142
|
+
|
143
|
+
def average_time_between_commits_in_minutes
|
144
|
+
average_time_between_commits(unit: 'minutes').round(2)
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
def uniqueness
|
149
|
+
result = Hash.new
|
150
|
+
|
151
|
+
self.tx_store.each do |tx|
|
152
|
+
query_size = tx.size-1
|
153
|
+
queries = tx.items.combination(query_size).to_a
|
154
|
+
previous_history = self.tx_store.clone_with_subset(0,tx.index)
|
155
|
+
|
156
|
+
queries.each do |query|
|
157
|
+
hits = previous_history.transactions_of_list(query,true).size
|
158
|
+
if result[hits].nil?
|
159
|
+
result[hits] = 1
|
160
|
+
else
|
161
|
+
result[hits] += 1
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
result
|
168
|
+
end
|
169
|
+
|
170
|
+
def measure_values
|
171
|
+
# generate some random rules with min sup 1
|
172
|
+
samplable = self.tx_store.select {|tx| tx.size > 1}
|
173
|
+
measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort
|
174
|
+
CSV {|r| r << measures} # print header
|
175
|
+
self.opts[:number].times do |i|
|
176
|
+
random_tx = samplable.sample
|
177
|
+
lhs = random_tx.items.sample(rand(1..random_tx.size-1))
|
178
|
+
rhs = (random_tx.items-lhs).sample
|
179
|
+
r = Evoc::Rule.new(lhs: lhs, rhs: rhs, tx_store: self.tx_store)
|
180
|
+
row = CSV::Row.new([],[],false)
|
181
|
+
measures.each do |m|
|
182
|
+
measure = r.get_measure(m)
|
183
|
+
if value = measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure
|
184
|
+
row << value.to_f
|
185
|
+
else
|
186
|
+
row << nil
|
187
|
+
end
|
188
|
+
end
|
189
|
+
CSV {|r| r << row} # print row
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def measure_ranges
|
194
|
+
#rules = []
|
195
|
+
#ab_range = ([0.00000001]+(0.0..1).step(0.01).to_a+[0.99999999]).map(&:rationalize)
|
196
|
+
#ab_range.each do |ab|
|
197
|
+
# ab_zero = ((ab == 0) ? 0.0000001 : 0)
|
198
|
+
# a_range = (ab+ab_zero..1).step(0.01).map(&:rationalize)
|
199
|
+
# b_range = (ab+ab_zero..1).step(0.01).map(&:rationalize)
|
200
|
+
# # construct all rules starting from A
|
201
|
+
# a_range.each do |a|
|
202
|
+
# new_b_range = (ab..1-a+ab).step(0.01).map(&:rationalize)
|
203
|
+
# new_b_range.each do |b|
|
204
|
+
# if (a == 0) | (b == 0)
|
205
|
+
# # a or b cant be 0
|
206
|
+
# next
|
207
|
+
# else
|
208
|
+
# r = Evoc::Rule.new(lhs: [], rhs: [])
|
209
|
+
# r.set_p('p_A',a)
|
210
|
+
# r.set_p('p_B',b)
|
211
|
+
# r.set_p('p_AB',ab)
|
212
|
+
# rules << r
|
213
|
+
# end
|
214
|
+
# end
|
215
|
+
# end
|
216
|
+
# # construct all rules starting from B
|
217
|
+
# b_range.each do |b|
|
218
|
+
# new_a_range = (ab..1-b+ab).step(0.01).map(&:rationalize)
|
219
|
+
# new_a_range.each do |a|
|
220
|
+
# r = Evoc::Rule.new(lhs: [], rhs: [])
|
221
|
+
# r.set_p('p_A',a)
|
222
|
+
# r.set_p('p_B',b)
|
223
|
+
# r.set_p('p_AB',ab)
|
224
|
+
# rules << r
|
225
|
+
# end
|
226
|
+
# end
|
227
|
+
#end
|
228
|
+
## A and B never change together, but do change
|
229
|
+
r1 = Evoc::Rule.new(lhs: [], rhs: [])
|
230
|
+
r1.set_p('p_A',1.to_r/4)
|
231
|
+
r1.set_p('p_B',1.to_r/4)
|
232
|
+
r1.set_p('p_AB',0)
|
233
|
+
# A and B never change together, and A almost never change
|
234
|
+
r2 = Evoc::Rule.new(lhs: [], rhs: [])
|
235
|
+
r2.set_p('p_A',1.to_r/1000000)
|
236
|
+
r2.set_p('p_B',1.to_r/2)
|
237
|
+
r2.set_p('p_AB',0)
|
238
|
+
# A and B never change together, and B almost never change
|
239
|
+
r3 = Evoc::Rule.new(lhs: [], rhs: [])
|
240
|
+
r3.set_p('p_A',1.to_r/2)
|
241
|
+
r3.set_p('p_B',1.to_r/1000000)
|
242
|
+
r3.set_p('p_AB',0)
|
243
|
+
# A and B never change together, but change half of the time
|
244
|
+
r4 = Evoc::Rule.new(lhs: [], rhs: [])
|
245
|
+
r4.set_p('p_A',1.to_r/2)
|
246
|
+
r4.set_p('p_B',1.to_r/2)
|
247
|
+
r4.set_p('p_AB',0)
|
248
|
+
|
249
|
+
# A and B never change together, and A and B almost never change
|
250
|
+
r5 = Evoc::Rule.new(lhs: [], rhs: [])
|
251
|
+
r5.set_p('p_A',1.to_r/1000000)
|
252
|
+
r5.set_p('p_B',1.to_r/1000000)
|
253
|
+
r5.set_p('p_AB',0)
|
254
|
+
|
255
|
+
# A and B always change together
|
256
|
+
r6 = Evoc::Rule.new(lhs: [], rhs: [])
|
257
|
+
r6.set_p('p_A',1.to_r/3)
|
258
|
+
r6.set_p('p_B',1.to_r/3)
|
259
|
+
r6.set_p('p_AB',1.to_r/3)
|
260
|
+
# A and B always change together, but rarely change
|
261
|
+
r7 = Evoc::Rule.new(lhs: [], rhs: [])
|
262
|
+
r7.set_p('p_A',1.to_r/1000000)
|
263
|
+
r7.set_p('p_B',1.to_r/1000000)
|
264
|
+
r7.set_p('p_AB',1.to_r/1000000)
|
265
|
+
# A and B always change together, always
|
266
|
+
r8 = Evoc::Rule.new(lhs: [], rhs: [])
|
267
|
+
r8.set_p('p_A',1)
|
268
|
+
r8.set_p('p_B',1)
|
269
|
+
r8.set_p('p_AB',1)
|
270
|
+
|
271
|
+
# B always change when A change, but rarely change
|
272
|
+
r9 = Evoc::Rule.new(lhs: [], rhs: [])
|
273
|
+
r9.set_p('p_A',1)
|
274
|
+
r9.set_p('p_B',1.to_r/10000000)
|
275
|
+
r9.set_p('p_AB',1.to_r/10000000)
|
276
|
+
|
277
|
+
# A always change when B change, but rarely change
|
278
|
+
r10 = Evoc::Rule.new(lhs: [], rhs: [])
|
279
|
+
r10.set_p('p_A',1.to_r/10000000)
|
280
|
+
r10.set_p('p_B',1)
|
281
|
+
r10.set_p('p_AB',1.to_r/10000000)
|
282
|
+
|
283
|
+
# A and B sometimes change together, B always
|
284
|
+
r11 = Evoc::Rule.new(lhs: [], rhs: [])
|
285
|
+
r11.set_p('p_A',1.to_r/2)
|
286
|
+
r11.set_p('p_B',1)
|
287
|
+
r11.set_p('p_AB',1.to_r/2)
|
288
|
+
|
289
|
+
# A and B sometimes change together, A always
|
290
|
+
r12 = Evoc::Rule.new(lhs: [], rhs: [])
|
291
|
+
r12.set_p('p_A',1)
|
292
|
+
r12.set_p('p_B',1.to_r/2)
|
293
|
+
r12.set_p('p_AB',1.to_r/2)
|
294
|
+
|
295
|
+
# A and B always change together, almost always
|
296
|
+
r13 = Evoc::Rule.new(lhs: [], rhs: [])
|
297
|
+
r13.set_p('p_A',0.99999999.rationalize)
|
298
|
+
r13.set_p('p_B',0.99999999.rationalize)
|
299
|
+
r13.set_p('p_AB',0.99999999.rationalize)
|
300
|
+
|
301
|
+
# A and B always change together, half of the time
|
302
|
+
r14 = Evoc::Rule.new(lhs: [], rhs: [])
|
303
|
+
r14.set_p('p_A',0.5.rationalize)
|
304
|
+
r14.set_p('p_B',0.5.rationalize)
|
305
|
+
r14.set_p('p_AB',0.5.rationalize)
|
306
|
+
|
307
|
+
# A and B sometimes change together, A always
|
308
|
+
r15 = Evoc::Rule.new(lhs: [], rhs: [])
|
309
|
+
r15.set_p('p_A',0.4.rationalize)
|
310
|
+
r15.set_p('p_B',0.4.rationalize)
|
311
|
+
r15.set_p('p_AB',0.1.rationalize)
|
312
|
+
|
313
|
+
# A and B sometimes change together, A always
|
314
|
+
r16 = Evoc::Rule.new(lhs: [], rhs: [])
|
315
|
+
r16.set_p('p_A',1.to_r/3)
|
316
|
+
r16.set_p('p_B',1.to_r/2)
|
317
|
+
r16.set_p('p_AB',1.to_r/4)
|
318
|
+
|
319
|
+
rules = [r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,r16]
|
320
|
+
|
321
|
+
measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort
|
322
|
+
CSV {|r| r << [nil,nil,nil,nil,'min_configuration',nil,nil,'max_configuration',nil,nil]}
|
323
|
+
CSV {|r| r << ['measure','current_range','min','max','P(A)','P(B)','P(A,B)','P(A)','P(B)','P(A,B)']} # print header
|
324
|
+
measures.each do |m|
|
325
|
+
values = Hash.new
|
326
|
+
rules.each do |r|
|
327
|
+
measure = r.get_measure(m)
|
328
|
+
if value = (measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure)
|
329
|
+
if value != Float::NAN
|
330
|
+
a = r.p_A
|
331
|
+
b = r.p_B
|
332
|
+
ab = r.p_AB
|
333
|
+
key = "#{a},#{b},#{ab}"
|
334
|
+
values[key] = Hash.new
|
335
|
+
values[key][:m] = measure
|
336
|
+
values[key][:v] = value.to_f
|
337
|
+
values[key][:a] = a
|
338
|
+
values[key][:b] = b
|
339
|
+
values[key][:ab] = ab
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
min = values.min_by {|k,v| v[:v]}
|
344
|
+
max = values.max_by {|k,v| v[:v]}
|
345
|
+
current_range = (min[1][:m].is_a?(Evoc::InterestingnessMeasure) ? "[#{Evoc::InterestingnessMeasures.get_min(m)},#{Evoc::InterestingnessMeasures.get_max(m)}]" : "[0,1]")
|
346
|
+
CSV {|r| r << [m,current_range,min[1][:v],max[1][:v],min[1][:a],min[1][:b],min[1][:ab],max[1][:a],max[1][:b],max[1][:ab]]} # print row
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
def aggregator_range
|
351
|
+
r = Random.new
|
352
|
+
$stdout.puts 'aggregator,length,range,y'
|
353
|
+
(1..1000).each do |length|
|
354
|
+
list = ([1]*length).map {|i| i - rand(0.00001..1)}.sort.reverse
|
355
|
+
list_inf = ([1]*length).map {|i| rand(0..10)}.sort.reverse
|
356
|
+
aggregator = Evoc::InterestingnessMeasureAggregator.new('m_support',list)
|
357
|
+
aggregator_inf = Evoc::InterestingnessMeasureAggregator.new('m_hyper_coefficient',list_inf)
|
358
|
+
$stdout.puts "CG,#{length},\"0..1\",#{aggregator.cg}"
|
359
|
+
$stdout.puts "DCG,#{length},\"0..1\",#{aggregator.dcg}"
|
360
|
+
$stdout.puts "DCG2,#{length},\"0..1\",#{aggregator.dcg2}"
|
361
|
+
$stdout.puts "CG,#{length},inf,#{aggregator_inf.cg}"
|
362
|
+
$stdout.puts "DCG,#{length},inf,#{aggregator_inf.dcg}"
|
363
|
+
$stdout.puts "DCG2,#{length},inf,#{aggregator_inf.dcg2}"
|
364
|
+
end
|
365
|
+
|
366
|
+
end
|
367
|
+
|
368
|
+
def create_dict
|
369
|
+
puts "id,name"
|
370
|
+
self.tx_store.int_2_name.each do |id,name|
|
371
|
+
puts "#{id},#{name}"
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
private
|
376
|
+
|
377
|
+
def time_span(unit: 'years')
|
378
|
+
t1 = self.tx_store.first.date
|
379
|
+
t2 = self.tx_store.last.date
|
380
|
+
TimeDifference.between(t1,t2).method('in_'+unit).call
|
381
|
+
end
|
382
|
+
|
383
|
+
def average_time_between_commits(unit: 'hours')
|
384
|
+
if self.tx_store.size > 1
|
385
|
+
t1 = self.tx_store.first.date
|
386
|
+
t2 = self.tx_store.last.date
|
387
|
+
total_time = TimeDifference.between(t1,t2).method('in_'+unit).call
|
388
|
+
total_time.to_f/(self.tx_store.size-1)
|
389
|
+
else
|
390
|
+
raise Exception.new, "History only contained 1 or 0 transactions"
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end #class
|
394
|
+
end #module
|
395
|
+
|
data/lib/evoc/array.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
class Array
|
2
|
+
def subset?(other)
|
3
|
+
self & other == self
|
4
|
+
end
|
5
|
+
|
6
|
+
def include_any?(other)
|
7
|
+
(self & other).size > 0
|
8
|
+
end
|
9
|
+
|
10
|
+
##
|
11
|
+
# returns the union of an array of arraya
|
12
|
+
def array_union
|
13
|
+
if union = self.inject(:|)
|
14
|
+
return union
|
15
|
+
else
|
16
|
+
return []
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# returns the intersection of a list of lists
|
22
|
+
def array_intersection
|
23
|
+
if intersection = self.inject(:&)
|
24
|
+
return intersection
|
25
|
+
else
|
26
|
+
return []
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# returns the list of items in self that was not in other
|
32
|
+
def array_difference(other)
|
33
|
+
self.map {|a| a - other}.array_union
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.powerset(set)
|
37
|
+
return [set] if set.empty?
|
38
|
+
p = set.pop
|
39
|
+
subset = powerset(set)
|
40
|
+
subset | subset.map { |x| x | [p] }
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Evaluate
|
3
|
+
extend Logging
|
4
|
+
|
5
|
+
|
6
|
+
def self.execute(recommendation,expected_outcome,evaluator)
|
7
|
+
if match = /average_precision(?<num>\d+)?/.match(evaluator)
|
8
|
+
if match[:num].nil?
|
9
|
+
self.average_precision(recommendation,expected_outcome)
|
10
|
+
else
|
11
|
+
self.average_precision(recommendation,expected_outcome,n: match[:num].to_i)
|
12
|
+
end
|
13
|
+
elsif match = /top10_recall/.match(evaluator)
|
14
|
+
self.top10_recall(recommendation,expected_outcome)
|
15
|
+
else raise ArgumentError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# calculate the ratio of correct items in the top 10
|
20
|
+
# @param [Array] recommendation a sorted array
|
21
|
+
# @param [Array] expected_outcome an array of items
|
22
|
+
# @return [Rational] the top10 recall
|
23
|
+
def self.top10_recall(recommendation,expected_outcome)
|
24
|
+
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
25
|
+
if (expected_outcome.size > 0) & !recommendation.empty?
|
26
|
+
top10 = recommendation.take(10).flatten
|
27
|
+
common_items = (expected_outcome & top10).size.to_r
|
28
|
+
return common_items/expected_outcome.size
|
29
|
+
else
|
30
|
+
nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
# calculate the average precision of the result based on an expected outcome
|
36
|
+
# @param [Array] recommendation a sorted array
|
37
|
+
# @param [Array] expected_outcome an array of items
|
38
|
+
# @return [Float] the average precision
|
39
|
+
def self.average_precision(recommendation,expected_outcome, n: recommendation.size)
|
40
|
+
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
41
|
+
if (expected_outcome.size > 0) & !recommendation.empty?
|
42
|
+
average_precision = 0
|
43
|
+
correct_items = []
|
44
|
+
total_items_considered = []
|
45
|
+
# sort rules by weight
|
46
|
+
# we first group rules with equal weights
|
47
|
+
# and then sort the groups by weight
|
48
|
+
recommendation.take(n).each do |items|
|
49
|
+
if !items.is_a?(Array) then items = [items] end
|
50
|
+
if items.first.class != expected_outcome.first.class
|
51
|
+
raise ArgumentError, "Expected outcome was of type #{expected_outcome.first.class}, while the item in the recommendation was of type #{items.first.class}"
|
52
|
+
end
|
53
|
+
# skip already considered items
|
54
|
+
if (new_items = items - total_items_considered).size > 0
|
55
|
+
new_items.each {|item| total_items_considered << item}
|
56
|
+
if correct_in_rule = (items & expected_outcome)
|
57
|
+
if correct_in_rule.size > 0
|
58
|
+
# make sure that the new items havent already been added earlier
|
59
|
+
new_correct = (correct_in_rule - correct_items)
|
60
|
+
# add new items
|
61
|
+
new_correct.each {|item| correct_items << item}
|
62
|
+
change_in_recall = new_correct.size.to_r/expected_outcome.size
|
63
|
+
precision_at_k = correct_items.size.to_r/total_items_considered.size
|
64
|
+
average_precision += (precision_at_k * change_in_recall)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
average_precision.to_f
|
70
|
+
else
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# calculate the grouped average precision of the result based on an expected outcome
|
76
|
+
def self.e_collected_average_precision(expected_outcome)
|
77
|
+
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
78
|
+
if (expected_outcome.size > 0) & !self.empty?
|
79
|
+
collected_average_precision = 0
|
80
|
+
correct_items = []
|
81
|
+
total_items_considered = []
|
82
|
+
# sort rules by weight
|
83
|
+
# we first group rules with equal weights
|
84
|
+
# and then sort the groups by weight
|
85
|
+
groups = self.group_by {|r| r.weight}.sort.reverse
|
86
|
+
groups.each do |(_,rules)|
|
87
|
+
items = rules.map(&:rhs).flatten.uniq
|
88
|
+
if (new_items = items - total_items_considered).size > 0
|
89
|
+
new_items.each {|item| total_items_considered << item}
|
90
|
+
if correct_in_group = (items & expected_outcome)
|
91
|
+
if correct_in_group.size > 0
|
92
|
+
# make sure that the new items havent already been added earlier
|
93
|
+
new_correct = (correct_in_group - correct_items)
|
94
|
+
# add new items
|
95
|
+
new_correct.each {|item| correct_items << item}
|
96
|
+
change_in_recall = new_correct.size.to_r/expected_outcome.size
|
97
|
+
precision_at_k = correct_items.size.to_r/total_items_considered.size
|
98
|
+
collected_average_precision += (precision_at_k * change_in_recall)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
self.collected_average_precision = collected_average_precision.to_f
|
104
|
+
else
|
105
|
+
self.collected_average_precision = nil
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|