evoc 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
data/lib/evoc/analyze.rb
ADDED
@@ -0,0 +1,395 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Analyze
|
3
|
+
|
4
|
+
attr_accessor :opts, :tx_store
|
5
|
+
|
6
|
+
def initialize(opts)
|
7
|
+
self.opts = opts
|
8
|
+
Logging.set_level(self.opts[:logger_level])
|
9
|
+
if opts[:tx_store].nil?
|
10
|
+
self.tx_store = TxStore.new(path: opts[:transactions],case_id: self.opts[:case_id], granularity: self.opts[:granularity])
|
11
|
+
else
|
12
|
+
self.tx_store = opts[:tx_store]
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
##
|
17
|
+
# Perform all of the numerical analyzes
|
18
|
+
#
|
19
|
+
# Prints to stdout
|
20
|
+
def all
|
21
|
+
methods = %W(num_commits percent_method_changes_of_all_changes avg_changes_per_file avg_method_changes_per_parsable_file num_unique_changes average_changes_per_commit average_commits_per_specific_change time_span_in_years average_time_between_commits_in_minutes)
|
22
|
+
|
23
|
+
CSV {|row| row << methods}
|
24
|
+
results = []
|
25
|
+
methods.each do |m|
|
26
|
+
results << self.method(m).call
|
27
|
+
end
|
28
|
+
CSV {|row| row << results}
|
29
|
+
end
|
30
|
+
|
31
|
+
def avg_method_changes_per_parsable_file
|
32
|
+
parsable_files_changed = 0
|
33
|
+
method_changes = 0
|
34
|
+
self.tx_store.each do |tx|
|
35
|
+
named_items = tx.items.map {|i| self.tx_store.int_2_name[i]}
|
36
|
+
# group changed items by file name
|
37
|
+
named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes|
|
38
|
+
# check if any method changes were found for this file
|
39
|
+
if changes.any? {|c| c =~ /:(?!@residuals)/}
|
40
|
+
parsable_files_changed += 1
|
41
|
+
# count number of method changes per file group
|
42
|
+
changes.each do |change|
|
43
|
+
if change =~ /:(?!@residuals)/
|
44
|
+
method_changes += 1
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return (method_changes.to_f/parsable_files_changed).round(2)
|
51
|
+
end
|
52
|
+
|
53
|
+
def avg_changes_per_file
|
54
|
+
files_changed = 0
|
55
|
+
total_changes = 0
|
56
|
+
self.tx_store.each do |tx|
|
57
|
+
named_items = tx.items.map {|i| self.tx_store.int_2_name[i]}
|
58
|
+
named_items.group_by {|i| /^(?<file>[^:]+?)(?::|$)/.match(i)[:file]}.each do |file,changes|
|
59
|
+
files_changed += 1
|
60
|
+
total_changes += changes.size
|
61
|
+
end
|
62
|
+
end
|
63
|
+
return (total_changes.to_f/files_changed).round(2)
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# @return [Float] the percentage of changes that are method level
|
68
|
+
def percent_method_changes_of_all_changes
|
69
|
+
total_changes = 0
|
70
|
+
file_changes = 0
|
71
|
+
self.tx_store.each do |tx|
|
72
|
+
tx.items.each do |item|
|
73
|
+
total_changes += 1
|
74
|
+
if self.tx_store.int_2_name[item] =~ /^[^:]+?(\.[^:\/]+?)?(?::@residuals$|$)/
|
75
|
+
file_changes += 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
return ((1 - file_changes.to_f/total_changes)*100).round(2)
|
80
|
+
end
|
81
|
+
|
82
|
+
def num_commits
|
83
|
+
self.tx_store.size
|
84
|
+
end
|
85
|
+
|
86
|
+
def average_commits_per_specific_change
|
87
|
+
(self.tx_store.items.values.inject(0) {|sum,txes| sum + txes.size}.to_f/self.tx_store.items.size).round(2)
|
88
|
+
end
|
89
|
+
|
90
|
+
##
|
91
|
+
# Prints a CSV formated table of the top N frequent files
|
92
|
+
# N is configured in opts[:top]
|
93
|
+
def file_frequency
|
94
|
+
# print header
|
95
|
+
CSV {|row| row << %W(file frequency)}
|
96
|
+
frequency = self.tx_store.items.map {|item,txes| [item, txes.size] }
|
97
|
+
frequency.sort_by! {|item,freq| -freq}
|
98
|
+
frequency.take(self.opts[:top]).each do |file,freq|
|
99
|
+
filename = self.tx_store.int_2_name[file]
|
100
|
+
CSV {|row| row << [filename,freq]}
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
##
|
105
|
+
# Dumps the commit sizes, one commit per line
|
106
|
+
def commit_size
|
107
|
+
if self.opts[:group]
|
108
|
+
$stdout.puts 'commit_size,frequency'
|
109
|
+
self.tx_store.group_by(&:size).sort.each do |size,txes|
|
110
|
+
STDOUT.puts "#{size},#{txes.size}"
|
111
|
+
end
|
112
|
+
else
|
113
|
+
$stdout.puts 'commit_size'
|
114
|
+
self.tx_store.each_with_index do |tx,index|
|
115
|
+
$stderr.print "Dumping commit sizes: #{index+1} of #{self.tx_store.size} \r"
|
116
|
+
$stdout.puts tx.size
|
117
|
+
end
|
118
|
+
end
|
119
|
+
$stderr.puts "DONE "
|
120
|
+
end
|
121
|
+
|
122
|
+
def num_unique_changes
|
123
|
+
self.tx_store.map(&:items).flatten.uniq.size
|
124
|
+
end
|
125
|
+
|
126
|
+
def average_changes_per_commit
|
127
|
+
arr = self.tx_store.map(&:size)
|
128
|
+
(arr.inject{ |sum, el| sum + el }.to_f / arr.size).round(2)
|
129
|
+
end
|
130
|
+
|
131
|
+
# added but not used as the data is not interesting --LM
|
132
|
+
def median_changes_per_commit
|
133
|
+
arr = self.tx_store.map(&:size)
|
134
|
+
sorted = arr.sort
|
135
|
+
len = sorted.length
|
136
|
+
(sorted[(len - 1) / 2] + sorted[len / 2]) / 2.0
|
137
|
+
end
|
138
|
+
|
139
|
+
def time_span_in_years
|
140
|
+
time_span(unit: 'years')
|
141
|
+
end
|
142
|
+
|
143
|
+
def average_time_between_commits_in_minutes
|
144
|
+
average_time_between_commits(unit: 'minutes').round(2)
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
def uniqueness
|
149
|
+
result = Hash.new
|
150
|
+
|
151
|
+
self.tx_store.each do |tx|
|
152
|
+
query_size = tx.size-1
|
153
|
+
queries = tx.items.combination(query_size).to_a
|
154
|
+
previous_history = self.tx_store.clone_with_subset(0,tx.index)
|
155
|
+
|
156
|
+
queries.each do |query|
|
157
|
+
hits = previous_history.transactions_of_list(query,true).size
|
158
|
+
if result[hits].nil?
|
159
|
+
result[hits] = 1
|
160
|
+
else
|
161
|
+
result[hits] += 1
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
result
|
168
|
+
end
|
169
|
+
|
170
|
+
def measure_values
|
171
|
+
# generate some random rules with min sup 1
|
172
|
+
samplable = self.tx_store.select {|tx| tx.size > 1}
|
173
|
+
measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort
|
174
|
+
CSV {|r| r << measures} # print header
|
175
|
+
self.opts[:number].times do |i|
|
176
|
+
random_tx = samplable.sample
|
177
|
+
lhs = random_tx.items.sample(rand(1..random_tx.size-1))
|
178
|
+
rhs = (random_tx.items-lhs).sample
|
179
|
+
r = Evoc::Rule.new(lhs: lhs, rhs: rhs, tx_store: self.tx_store)
|
180
|
+
row = CSV::Row.new([],[],false)
|
181
|
+
measures.each do |m|
|
182
|
+
measure = r.get_measure(m)
|
183
|
+
if value = measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure
|
184
|
+
row << value.to_f
|
185
|
+
else
|
186
|
+
row << nil
|
187
|
+
end
|
188
|
+
end
|
189
|
+
CSV {|r| r << row} # print row
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def measure_ranges
|
194
|
+
#rules = []
|
195
|
+
#ab_range = ([0.00000001]+(0.0..1).step(0.01).to_a+[0.99999999]).map(&:rationalize)
|
196
|
+
#ab_range.each do |ab|
|
197
|
+
# ab_zero = ((ab == 0) ? 0.0000001 : 0)
|
198
|
+
# a_range = (ab+ab_zero..1).step(0.01).map(&:rationalize)
|
199
|
+
# b_range = (ab+ab_zero..1).step(0.01).map(&:rationalize)
|
200
|
+
# # construct all rules starting from A
|
201
|
+
# a_range.each do |a|
|
202
|
+
# new_b_range = (ab..1-a+ab).step(0.01).map(&:rationalize)
|
203
|
+
# new_b_range.each do |b|
|
204
|
+
# if (a == 0) | (b == 0)
|
205
|
+
# # a or b cant be 0
|
206
|
+
# next
|
207
|
+
# else
|
208
|
+
# r = Evoc::Rule.new(lhs: [], rhs: [])
|
209
|
+
# r.set_p('p_A',a)
|
210
|
+
# r.set_p('p_B',b)
|
211
|
+
# r.set_p('p_AB',ab)
|
212
|
+
# rules << r
|
213
|
+
# end
|
214
|
+
# end
|
215
|
+
# end
|
216
|
+
# # construct all rules starting from B
|
217
|
+
# b_range.each do |b|
|
218
|
+
# new_a_range = (ab..1-b+ab).step(0.01).map(&:rationalize)
|
219
|
+
# new_a_range.each do |a|
|
220
|
+
# r = Evoc::Rule.new(lhs: [], rhs: [])
|
221
|
+
# r.set_p('p_A',a)
|
222
|
+
# r.set_p('p_B',b)
|
223
|
+
# r.set_p('p_AB',ab)
|
224
|
+
# rules << r
|
225
|
+
# end
|
226
|
+
# end
|
227
|
+
#end
|
228
|
+
## A and B never change together, but do change
|
229
|
+
r1 = Evoc::Rule.new(lhs: [], rhs: [])
|
230
|
+
r1.set_p('p_A',1.to_r/4)
|
231
|
+
r1.set_p('p_B',1.to_r/4)
|
232
|
+
r1.set_p('p_AB',0)
|
233
|
+
# A and B never change together, and A almost never change
|
234
|
+
r2 = Evoc::Rule.new(lhs: [], rhs: [])
|
235
|
+
r2.set_p('p_A',1.to_r/1000000)
|
236
|
+
r2.set_p('p_B',1.to_r/2)
|
237
|
+
r2.set_p('p_AB',0)
|
238
|
+
# A and B never change together, and B almost never change
|
239
|
+
r3 = Evoc::Rule.new(lhs: [], rhs: [])
|
240
|
+
r3.set_p('p_A',1.to_r/2)
|
241
|
+
r3.set_p('p_B',1.to_r/1000000)
|
242
|
+
r3.set_p('p_AB',0)
|
243
|
+
# A and B never change together, but change half of the time
|
244
|
+
r4 = Evoc::Rule.new(lhs: [], rhs: [])
|
245
|
+
r4.set_p('p_A',1.to_r/2)
|
246
|
+
r4.set_p('p_B',1.to_r/2)
|
247
|
+
r4.set_p('p_AB',0)
|
248
|
+
|
249
|
+
# A and B never change together, and A and B almost never change
|
250
|
+
r5 = Evoc::Rule.new(lhs: [], rhs: [])
|
251
|
+
r5.set_p('p_A',1.to_r/1000000)
|
252
|
+
r5.set_p('p_B',1.to_r/1000000)
|
253
|
+
r5.set_p('p_AB',0)
|
254
|
+
|
255
|
+
# A and B always change together
|
256
|
+
r6 = Evoc::Rule.new(lhs: [], rhs: [])
|
257
|
+
r6.set_p('p_A',1.to_r/3)
|
258
|
+
r6.set_p('p_B',1.to_r/3)
|
259
|
+
r6.set_p('p_AB',1.to_r/3)
|
260
|
+
# A and B always change together, but rarely change
|
261
|
+
r7 = Evoc::Rule.new(lhs: [], rhs: [])
|
262
|
+
r7.set_p('p_A',1.to_r/1000000)
|
263
|
+
r7.set_p('p_B',1.to_r/1000000)
|
264
|
+
r7.set_p('p_AB',1.to_r/1000000)
|
265
|
+
# A and B always change together, always
|
266
|
+
r8 = Evoc::Rule.new(lhs: [], rhs: [])
|
267
|
+
r8.set_p('p_A',1)
|
268
|
+
r8.set_p('p_B',1)
|
269
|
+
r8.set_p('p_AB',1)
|
270
|
+
|
271
|
+
# B always change when A change, but rarely change
|
272
|
+
r9 = Evoc::Rule.new(lhs: [], rhs: [])
|
273
|
+
r9.set_p('p_A',1)
|
274
|
+
r9.set_p('p_B',1.to_r/10000000)
|
275
|
+
r9.set_p('p_AB',1.to_r/10000000)
|
276
|
+
|
277
|
+
# A always change when B change, but rarely change
|
278
|
+
r10 = Evoc::Rule.new(lhs: [], rhs: [])
|
279
|
+
r10.set_p('p_A',1.to_r/10000000)
|
280
|
+
r10.set_p('p_B',1)
|
281
|
+
r10.set_p('p_AB',1.to_r/10000000)
|
282
|
+
|
283
|
+
# A and B sometimes change together, B always
|
284
|
+
r11 = Evoc::Rule.new(lhs: [], rhs: [])
|
285
|
+
r11.set_p('p_A',1.to_r/2)
|
286
|
+
r11.set_p('p_B',1)
|
287
|
+
r11.set_p('p_AB',1.to_r/2)
|
288
|
+
|
289
|
+
# A and B sometimes change together, A always
|
290
|
+
r12 = Evoc::Rule.new(lhs: [], rhs: [])
|
291
|
+
r12.set_p('p_A',1)
|
292
|
+
r12.set_p('p_B',1.to_r/2)
|
293
|
+
r12.set_p('p_AB',1.to_r/2)
|
294
|
+
|
295
|
+
# A and B always change together, almost always
|
296
|
+
r13 = Evoc::Rule.new(lhs: [], rhs: [])
|
297
|
+
r13.set_p('p_A',0.99999999.rationalize)
|
298
|
+
r13.set_p('p_B',0.99999999.rationalize)
|
299
|
+
r13.set_p('p_AB',0.99999999.rationalize)
|
300
|
+
|
301
|
+
# A and B always change together, half of the time
|
302
|
+
r14 = Evoc::Rule.new(lhs: [], rhs: [])
|
303
|
+
r14.set_p('p_A',0.5.rationalize)
|
304
|
+
r14.set_p('p_B',0.5.rationalize)
|
305
|
+
r14.set_p('p_AB',0.5.rationalize)
|
306
|
+
|
307
|
+
# A and B sometimes change together, A always
|
308
|
+
r15 = Evoc::Rule.new(lhs: [], rhs: [])
|
309
|
+
r15.set_p('p_A',0.4.rationalize)
|
310
|
+
r15.set_p('p_B',0.4.rationalize)
|
311
|
+
r15.set_p('p_AB',0.1.rationalize)
|
312
|
+
|
313
|
+
# A and B sometimes change together, A always
|
314
|
+
r16 = Evoc::Rule.new(lhs: [], rhs: [])
|
315
|
+
r16.set_p('p_A',1.to_r/3)
|
316
|
+
r16.set_p('p_B',1.to_r/2)
|
317
|
+
r16.set_p('p_AB',1.to_r/4)
|
318
|
+
|
319
|
+
rules = [r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,r16]
|
320
|
+
|
321
|
+
measures = (Evoc::Rule.p_measures + Evoc::Rule.measures).sort
|
322
|
+
CSV {|r| r << [nil,nil,nil,nil,'min_configuration',nil,nil,'max_configuration',nil,nil]}
|
323
|
+
CSV {|r| r << ['measure','current_range','min','max','P(A)','P(B)','P(A,B)','P(A)','P(B)','P(A,B)']} # print header
|
324
|
+
measures.each do |m|
|
325
|
+
values = Hash.new
|
326
|
+
rules.each do |r|
|
327
|
+
measure = r.get_measure(m)
|
328
|
+
if value = (measure.is_a?(Evoc::InterestingnessMeasure) ? measure.value : measure)
|
329
|
+
if value != Float::NAN
|
330
|
+
a = r.p_A
|
331
|
+
b = r.p_B
|
332
|
+
ab = r.p_AB
|
333
|
+
key = "#{a},#{b},#{ab}"
|
334
|
+
values[key] = Hash.new
|
335
|
+
values[key][:m] = measure
|
336
|
+
values[key][:v] = value.to_f
|
337
|
+
values[key][:a] = a
|
338
|
+
values[key][:b] = b
|
339
|
+
values[key][:ab] = ab
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
min = values.min_by {|k,v| v[:v]}
|
344
|
+
max = values.max_by {|k,v| v[:v]}
|
345
|
+
current_range = (min[1][:m].is_a?(Evoc::InterestingnessMeasure) ? "[#{Evoc::InterestingnessMeasures.get_min(m)},#{Evoc::InterestingnessMeasures.get_max(m)}]" : "[0,1]")
|
346
|
+
CSV {|r| r << [m,current_range,min[1][:v],max[1][:v],min[1][:a],min[1][:b],min[1][:ab],max[1][:a],max[1][:b],max[1][:ab]]} # print row
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
def aggregator_range
|
351
|
+
r = Random.new
|
352
|
+
$stdout.puts 'aggregator,length,range,y'
|
353
|
+
(1..1000).each do |length|
|
354
|
+
list = ([1]*length).map {|i| i - rand(0.00001..1)}.sort.reverse
|
355
|
+
list_inf = ([1]*length).map {|i| rand(0..10)}.sort.reverse
|
356
|
+
aggregator = Evoc::InterestingnessMeasureAggregator.new('m_support',list)
|
357
|
+
aggregator_inf = Evoc::InterestingnessMeasureAggregator.new('m_hyper_coefficient',list_inf)
|
358
|
+
$stdout.puts "CG,#{length},\"0..1\",#{aggregator.cg}"
|
359
|
+
$stdout.puts "DCG,#{length},\"0..1\",#{aggregator.dcg}"
|
360
|
+
$stdout.puts "DCG2,#{length},\"0..1\",#{aggregator.dcg2}"
|
361
|
+
$stdout.puts "CG,#{length},inf,#{aggregator_inf.cg}"
|
362
|
+
$stdout.puts "DCG,#{length},inf,#{aggregator_inf.dcg}"
|
363
|
+
$stdout.puts "DCG2,#{length},inf,#{aggregator_inf.dcg2}"
|
364
|
+
end
|
365
|
+
|
366
|
+
end
|
367
|
+
|
368
|
+
def create_dict
|
369
|
+
puts "id,name"
|
370
|
+
self.tx_store.int_2_name.each do |id,name|
|
371
|
+
puts "#{id},#{name}"
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
private
|
376
|
+
|
377
|
+
def time_span(unit: 'years')
|
378
|
+
t1 = self.tx_store.first.date
|
379
|
+
t2 = self.tx_store.last.date
|
380
|
+
TimeDifference.between(t1,t2).method('in_'+unit).call
|
381
|
+
end
|
382
|
+
|
383
|
+
def average_time_between_commits(unit: 'hours')
|
384
|
+
if self.tx_store.size > 1
|
385
|
+
t1 = self.tx_store.first.date
|
386
|
+
t2 = self.tx_store.last.date
|
387
|
+
total_time = TimeDifference.between(t1,t2).method('in_'+unit).call
|
388
|
+
total_time.to_f/(self.tx_store.size-1)
|
389
|
+
else
|
390
|
+
raise Exception.new, "History only contained 1 or 0 transactions"
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end #class
|
394
|
+
end #module
|
395
|
+
|
data/lib/evoc/array.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
class Array
|
2
|
+
def subset?(other)
|
3
|
+
self & other == self
|
4
|
+
end
|
5
|
+
|
6
|
+
def include_any?(other)
|
7
|
+
(self & other).size > 0
|
8
|
+
end
|
9
|
+
|
10
|
+
##
|
11
|
+
# returns the union of an array of arraya
|
12
|
+
def array_union
|
13
|
+
if union = self.inject(:|)
|
14
|
+
return union
|
15
|
+
else
|
16
|
+
return []
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# returns the intersection of a list of lists
|
22
|
+
def array_intersection
|
23
|
+
if intersection = self.inject(:&)
|
24
|
+
return intersection
|
25
|
+
else
|
26
|
+
return []
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# returns the list of items in self that was not in other
|
32
|
+
def array_difference(other)
|
33
|
+
self.map {|a| a - other}.array_union
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.powerset(set)
|
37
|
+
return [set] if set.empty?
|
38
|
+
p = set.pop
|
39
|
+
subset = powerset(set)
|
40
|
+
subset | subset.map { |x| x | [p] }
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
module Evoc
|
2
|
+
class Evaluate
|
3
|
+
extend Logging
|
4
|
+
|
5
|
+
|
6
|
+
def self.execute(recommendation,expected_outcome,evaluator)
|
7
|
+
if match = /average_precision(?<num>\d+)?/.match(evaluator)
|
8
|
+
if match[:num].nil?
|
9
|
+
self.average_precision(recommendation,expected_outcome)
|
10
|
+
else
|
11
|
+
self.average_precision(recommendation,expected_outcome,n: match[:num].to_i)
|
12
|
+
end
|
13
|
+
elsif match = /top10_recall/.match(evaluator)
|
14
|
+
self.top10_recall(recommendation,expected_outcome)
|
15
|
+
else raise ArgumentError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# calculate the ratio of correct items in the top 10
|
20
|
+
# @param [Array] recommendation a sorted array
|
21
|
+
# @param [Array] expected_outcome an array of items
|
22
|
+
# @return [Rational] the top10 recall
|
23
|
+
def self.top10_recall(recommendation,expected_outcome)
|
24
|
+
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
25
|
+
if (expected_outcome.size > 0) & !recommendation.empty?
|
26
|
+
top10 = recommendation.take(10).flatten
|
27
|
+
common_items = (expected_outcome & top10).size.to_r
|
28
|
+
return common_items/expected_outcome.size
|
29
|
+
else
|
30
|
+
nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
# calculate the average precision of the result based on an expected outcome
|
36
|
+
# @param [Array] recommendation a sorted array
|
37
|
+
# @param [Array] expected_outcome an array of items
|
38
|
+
# @return [Float] the average precision
|
39
|
+
def self.average_precision(recommendation,expected_outcome, n: recommendation.size)
|
40
|
+
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
41
|
+
if (expected_outcome.size > 0) & !recommendation.empty?
|
42
|
+
average_precision = 0
|
43
|
+
correct_items = []
|
44
|
+
total_items_considered = []
|
45
|
+
# sort rules by weight
|
46
|
+
# we first group rules with equal weights
|
47
|
+
# and then sort the groups by weight
|
48
|
+
recommendation.take(n).each do |items|
|
49
|
+
if !items.is_a?(Array) then items = [items] end
|
50
|
+
if items.first.class != expected_outcome.first.class
|
51
|
+
raise ArgumentError, "Expected outcome was of type #{expected_outcome.first.class}, while the item in the recommendation was of type #{items.first.class}"
|
52
|
+
end
|
53
|
+
# skip already considered items
|
54
|
+
if (new_items = items - total_items_considered).size > 0
|
55
|
+
new_items.each {|item| total_items_considered << item}
|
56
|
+
if correct_in_rule = (items & expected_outcome)
|
57
|
+
if correct_in_rule.size > 0
|
58
|
+
# make sure that the new items havent already been added earlier
|
59
|
+
new_correct = (correct_in_rule - correct_items)
|
60
|
+
# add new items
|
61
|
+
new_correct.each {|item| correct_items << item}
|
62
|
+
change_in_recall = new_correct.size.to_r/expected_outcome.size
|
63
|
+
precision_at_k = correct_items.size.to_r/total_items_considered.size
|
64
|
+
average_precision += (precision_at_k * change_in_recall)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
average_precision.to_f
|
70
|
+
else
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# calculate the grouped average precision of the result based on an expected outcome
|
76
|
+
def self.e_collected_average_precision(expected_outcome)
|
77
|
+
if !expected_outcome.is_a?(Array) then expected_outcome = [expected_outcome] end
|
78
|
+
if (expected_outcome.size > 0) & !self.empty?
|
79
|
+
collected_average_precision = 0
|
80
|
+
correct_items = []
|
81
|
+
total_items_considered = []
|
82
|
+
# sort rules by weight
|
83
|
+
# we first group rules with equal weights
|
84
|
+
# and then sort the groups by weight
|
85
|
+
groups = self.group_by {|r| r.weight}.sort.reverse
|
86
|
+
groups.each do |(_,rules)|
|
87
|
+
items = rules.map(&:rhs).flatten.uniq
|
88
|
+
if (new_items = items - total_items_considered).size > 0
|
89
|
+
new_items.each {|item| total_items_considered << item}
|
90
|
+
if correct_in_group = (items & expected_outcome)
|
91
|
+
if correct_in_group.size > 0
|
92
|
+
# make sure that the new items havent already been added earlier
|
93
|
+
new_correct = (correct_in_group - correct_items)
|
94
|
+
# add new items
|
95
|
+
new_correct.each {|item| correct_items << item}
|
96
|
+
change_in_recall = new_correct.size.to_r/expected_outcome.size
|
97
|
+
precision_at_k = correct_items.size.to_r/total_items_considered.size
|
98
|
+
collected_average_precision += (precision_at_k * change_in_recall)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
self.collected_average_precision = collected_average_precision.to_f
|
104
|
+
else
|
105
|
+
self.collected_average_precision = nil
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|