evoc 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
module Evoc
|
2
|
+
class TxStore
|
3
|
+
include Enumerable,Logging
|
4
|
+
attr_reader :txes, :items, :tx_index_mapping
|
5
|
+
attr_accessor :name_2_int, :int_2_name
|
6
|
+
|
7
|
+
# CLASS VARIABLES
|
8
|
+
@@case_id = nil
|
9
|
+
|
10
|
+
def initialize(path: nil,case_id: nil, granularity: 'mixed')
|
11
|
+
@txes = []
|
12
|
+
@items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
|
13
|
+
# keeps track of each txs location
|
14
|
+
# in the @txes array using the tx.index variable
|
15
|
+
@tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new
|
16
|
+
# keeps a dictionary of item ids and their full filename
|
17
|
+
# populated when first importing the json file
|
18
|
+
@name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new
|
19
|
+
@int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
|
20
|
+
if !path.nil?
|
21
|
+
load_transactions(path: path, granularity: granularity)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Initialize class variables
|
25
|
+
@@case_id ||= case_id
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
##
|
30
|
+
# self << tx
|
31
|
+
#
|
32
|
+
# adds <tx> and updates @items with which items are changed in which tx
|
33
|
+
def << tx
|
34
|
+
if tx.respond_to?(:items)
|
35
|
+
if tx.index.nil?
|
36
|
+
tx.index = self.size
|
37
|
+
end
|
38
|
+
##
|
39
|
+
# BUILD INTEGER REPRESENTATION
|
40
|
+
# internally, items (e.g., files/methods) are stored as unique integers
|
41
|
+
# but a dictionary is kept updated with item -> integer mappings
|
42
|
+
if !tx.items.all? {|i| i.is_a?(Integer)}
|
43
|
+
integer_representation = []
|
44
|
+
tx.items.each do |item|
|
45
|
+
if !self.name_2_int.key?(item)
|
46
|
+
int = self.name_2_int.size
|
47
|
+
self.name_2_int[item] = int
|
48
|
+
self.int_2_name[int] = item
|
49
|
+
end
|
50
|
+
integer_representation << self.name_2_int[item]
|
51
|
+
end
|
52
|
+
tx.items = integer_representation
|
53
|
+
end
|
54
|
+
##
|
55
|
+
# BUILD ITEM <-> TX MAPPING
|
56
|
+
tx.items.each do |item|
|
57
|
+
if !@items.key?(item)
|
58
|
+
@items[item] = [tx]
|
59
|
+
else
|
60
|
+
@items[item] << tx
|
61
|
+
end
|
62
|
+
end
|
63
|
+
@tx_index_mapping[tx.index] = @txes.size
|
64
|
+
@txes << tx
|
65
|
+
else
|
66
|
+
raise Evoc::Exceptions::NotATransaction.new(tx)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# implementing #each gives us access to all Enumerable methods
|
71
|
+
# select, find_all etc
|
72
|
+
def each &block
|
73
|
+
@txes.each do |tx|
|
74
|
+
if block_given?
|
75
|
+
block.call tx
|
76
|
+
else
|
77
|
+
yield tx
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def first
|
83
|
+
@txes.first
|
84
|
+
end
|
85
|
+
|
86
|
+
def last
|
87
|
+
@txes.last
|
88
|
+
end
|
89
|
+
|
90
|
+
def [] index
|
91
|
+
@txes[index]
|
92
|
+
end
|
93
|
+
|
94
|
+
def to_s
|
95
|
+
history = ""
|
96
|
+
self.txes.reverse.each do |tx|
|
97
|
+
history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n"
|
98
|
+
end
|
99
|
+
history
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# clear out the currently loaded transactions
|
104
|
+
def clear
|
105
|
+
@txes.clear
|
106
|
+
@items.clear
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
##
|
111
|
+
# Retrieve a transaction using the given identifier
|
112
|
+
#
|
113
|
+
def get_tx(id:,id_type: :index)
|
114
|
+
tx = nil
|
115
|
+
case id_type
|
116
|
+
when :index
|
117
|
+
raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum)
|
118
|
+
if index = @tx_index_mapping[id]
|
119
|
+
tx = @txes[index]
|
120
|
+
end
|
121
|
+
when :id
|
122
|
+
tx = @txes.find {|tx| tx.id == id }
|
123
|
+
end
|
124
|
+
if tx.nil?
|
125
|
+
raise ArgumentError, "No transaction with #{id_type} #{id}"
|
126
|
+
else
|
127
|
+
return tx
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# Given an item, find those transactions
|
132
|
+
# where the item has been modified
|
133
|
+
# parameters:
|
134
|
+
# item: the item to check
|
135
|
+
# identifier: how to represent the found transactions, either using :index or :id
|
136
|
+
def transactions_of(item, identifier: :index)
|
137
|
+
# if there are no transactions
|
138
|
+
# just return an empty list
|
139
|
+
if self.size.zero?
|
140
|
+
txes = []
|
141
|
+
else
|
142
|
+
# get the transactions
|
143
|
+
# return [] if the item cannot be found
|
144
|
+
txes = self.items.key?(item) ? self.items[item] : []
|
145
|
+
end
|
146
|
+
txes.map(&identifier)
|
147
|
+
end
|
148
|
+
|
149
|
+
##
|
150
|
+
# Returns the relevant transactions of the query
|
151
|
+
# That is: all the transactions where at least one
|
152
|
+
# item from the query were changed
|
153
|
+
#
|
154
|
+
# parameters:
|
155
|
+
# query: a list of items
|
156
|
+
# (optional) strict: if set to true, all the items of the query has had
|
157
|
+
# to be changed in the transaction for it to be included
|
158
|
+
def transactions_of_list(items, strict: false, identifier: :index)
|
159
|
+
if strict
|
160
|
+
items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection
|
161
|
+
else
|
162
|
+
items.map {|item| transactions_of(item, identifier: identifier)}.array_union
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Return the list of items that have changed
|
167
|
+
# with at least one item from the query
|
168
|
+
def relevant_unchanged_items(query)
|
169
|
+
transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union
|
170
|
+
end
|
171
|
+
|
172
|
+
def size
|
173
|
+
@txes.size
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
##
|
178
|
+
# read in a JSON file of transactions
|
179
|
+
#
|
180
|
+
# TRANSACTIONS ARE LOADED IN REVERSE ORDER!
|
181
|
+
#
|
182
|
+
# this implies that the oldest transaction gets index 0 in the txes array
|
183
|
+
# and the newest has index txes.size-1
|
184
|
+
# (givent that the json file is sorted from newest/top to oldest/bottom)
|
185
|
+
#
|
186
|
+
# @param [String] path the path to the json history file
|
187
|
+
# @param [Symbol] granularity one of :mixed,:file or :method
|
188
|
+
def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed')
|
189
|
+
if !path.nil?
|
190
|
+
json = nil
|
191
|
+
if File.extname(path) == '.gz'
|
192
|
+
Zlib::GzipReader.open(path) {|gz|
|
193
|
+
json = gz.read
|
194
|
+
}
|
195
|
+
else
|
196
|
+
json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8')
|
197
|
+
end
|
198
|
+
|
199
|
+
STDERR.puts "Loading transactions using strategy: #{granularity}"
|
200
|
+
json.lines.reverse.each do |json_line|
|
201
|
+
begin
|
202
|
+
json_object = JSON.parse(json_line)
|
203
|
+
if valid_date?(json_object,before,after)
|
204
|
+
tx = nil
|
205
|
+
id = json_object["sha"]
|
206
|
+
date = json_object["date"]
|
207
|
+
if items = json_object["changes"]
|
208
|
+
if !items.compact.empty?
|
209
|
+
case granularity
|
210
|
+
when 'mixed'
|
211
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items.compact)
|
212
|
+
when 'file'
|
213
|
+
# group all items by parsable files, and return only the unique set of filenames
|
214
|
+
items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?)
|
215
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items)
|
216
|
+
when 'method'
|
217
|
+
# group all items by parsable files, return only the methods and @residuals
|
218
|
+
items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s } # group items by parsable files
|
219
|
+
.select {|k,v| !k.empty?} # filter out the non-parsable files
|
220
|
+
.values # get the methods
|
221
|
+
.flatten # flatten the list of list of methods
|
222
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items)
|
223
|
+
when 'file_all'
|
224
|
+
items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name
|
225
|
+
.keys # get the set of files
|
226
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items)
|
227
|
+
else
|
228
|
+
raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}"
|
229
|
+
end
|
230
|
+
else
|
231
|
+
logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)"
|
232
|
+
next
|
233
|
+
end
|
234
|
+
else
|
235
|
+
logger.warn "#{json["sha"]} did not have a \"changes\" field"
|
236
|
+
next
|
237
|
+
end
|
238
|
+
if tx.nil?
|
239
|
+
logger.warn "#{json["sha"]} could not be converted to a tx"
|
240
|
+
next
|
241
|
+
end
|
242
|
+
if tx.items.empty?
|
243
|
+
logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts"
|
244
|
+
next
|
245
|
+
end
|
246
|
+
self << tx
|
247
|
+
end
|
248
|
+
rescue JSON::ParserError => e
|
249
|
+
logger.warn e.message
|
250
|
+
next # skip to next line
|
251
|
+
rescue Evoc::Exceptions::NoDateInJsonObject => e
|
252
|
+
logger.warn e.message
|
253
|
+
next
|
254
|
+
end
|
255
|
+
end
|
256
|
+
STDERR.puts "Loaded #{self.size} transactions from #{path}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
##
|
261
|
+
# a looser version of #between?
|
262
|
+
# we also allow nil comparisons
|
263
|
+
# if both <after> and <before> are nil we consider the date valid
|
264
|
+
def valid_date?(json_object,after,before)
|
265
|
+
if date = json_object["date"]
|
266
|
+
if after.nil? & before.nil?
|
267
|
+
return true
|
268
|
+
elsif !after.nil? & !before.nil?
|
269
|
+
if date.between?(after, before)
|
270
|
+
return true
|
271
|
+
end
|
272
|
+
elsif !after.nil?
|
273
|
+
if date > after
|
274
|
+
return true
|
275
|
+
end
|
276
|
+
elsif !before.nil?
|
277
|
+
if date < before
|
278
|
+
return true
|
279
|
+
end
|
280
|
+
end
|
281
|
+
else
|
282
|
+
raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field."
|
283
|
+
end
|
284
|
+
return false
|
285
|
+
end
|
286
|
+
|
287
|
+
##
|
288
|
+
# #get_cloned_subset
|
289
|
+
#
|
290
|
+
# Returns a clone of <self> with transactions equal to the index range defined
|
291
|
+
# by
|
292
|
+
# from and including <start_index> to and including <stop_index>
|
293
|
+
# also exclude transactions with size larger than <max_size>
|
294
|
+
def clone_with_subset(start_index,stop_index,max_size = nil)
|
295
|
+
clone = TxStore.new
|
296
|
+
clone.name_2_int = self.name_2_int
|
297
|
+
clone.int_2_name = self.int_2_name
|
298
|
+
if start_index.nil? & stop_index.nil? & max_size.nil? then return self end
|
299
|
+
# if only one of start_index and stop_index is provided, raise exception
|
300
|
+
if !start_index.nil? ^ !stop_index.nil?
|
301
|
+
raise ArgumentError.new "You must provide both a start and end index"
|
302
|
+
end
|
303
|
+
# check that its a valid range
|
304
|
+
if range = self.txes[start_index..stop_index]
|
305
|
+
if max_size.nil?
|
306
|
+
range.each do |tx|
|
307
|
+
clone << tx
|
308
|
+
end
|
309
|
+
else
|
310
|
+
range.select {|tx| tx.size <= max_size}.each do |tx|
|
311
|
+
clone << tx
|
312
|
+
end
|
313
|
+
end
|
314
|
+
else
|
315
|
+
raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}"
|
316
|
+
end
|
317
|
+
clone
|
318
|
+
end
|
319
|
+
|
320
|
+
##
|
321
|
+
# Return a new tx_store containing the specified tx ids
|
322
|
+
def clone_by_indexes(tx_indexes)
|
323
|
+
subset = TxStore.new
|
324
|
+
self.each do |tx|
|
325
|
+
if tx_indexes.include?(tx.index)
|
326
|
+
subset << tx
|
327
|
+
end
|
328
|
+
end
|
329
|
+
return subset
|
330
|
+
end
|
331
|
+
|
332
|
+
############################################
|
333
|
+
# HELPERS #
|
334
|
+
# #
|
335
|
+
# These are meant to be used by all #
|
336
|
+
# algorithm implementations if needed. #
|
337
|
+
# Having these methods here eases testing. #
|
338
|
+
############################################
|
339
|
+
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
##
|
344
|
+
# return a (string) json representation of the tx_store
|
345
|
+
def to_json
|
346
|
+
commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
|
347
|
+
self.each do |tx|
|
348
|
+
sha = tx.id
|
349
|
+
commits[sha][:sha] = sha
|
350
|
+
commits[sha][:date] = tx.date
|
351
|
+
commits[sha][:index] = tx.index
|
352
|
+
commits[sha][:changes][:all] = []
|
353
|
+
tx.items.each {|item| commits[sha][:changes][:all] << item}
|
354
|
+
end
|
355
|
+
# print the commits sorted by index
|
356
|
+
# but dont include the index in the json as there might be "holes" (after filtering etc)
|
357
|
+
JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}})
|
358
|
+
end
|
359
|
+
|
360
|
+
def pretty_print
|
361
|
+
self.txes.reverse.each {|tx| CSV {|row| row << tx.items}}
|
362
|
+
end
|
363
|
+
|
364
|
+
private
|
365
|
+
def parse_date date
|
366
|
+
if !date.nil?
|
367
|
+
begin
|
368
|
+
Time.parse date
|
369
|
+
rescue TypeError => e
|
370
|
+
# something else than string was given as input
|
371
|
+
$stderr.puts "Unable to parse #{date}, error: " + e
|
372
|
+
rescue ArgumentError => e
|
373
|
+
# unable to parse the string for a date
|
374
|
+
$stderr.puts "Unable to parse #{date} for a date, error: " + e
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
data/lib/evoc/version.rb
ADDED
data/lib/evoc.rb
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
require_relative 'cli_helper'
|
2
|
+
|
3
|
+
module EvocCLI
|
4
|
+
class Analyze < Thor
|
5
|
+
class_option :transactions, :aliases => '-t', :type => :string, :required => false, :desc => "Path to change-history"
|
6
|
+
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
|
+
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
8
|
+
|
9
|
+
|
10
|
+
method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
|
11
|
+
desc "measure_values","Empirically investigate the range of interestingness measures"
|
12
|
+
def measure_values
|
13
|
+
a = Evoc::Analyze.new(options)
|
14
|
+
a.measure_values
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "measure_ranges","Empirically investigate the range of interestingness measures"
|
18
|
+
def measure_ranges
|
19
|
+
a = Evoc::Analyze.new(options)
|
20
|
+
a.measure_ranges
|
21
|
+
end
|
22
|
+
|
23
|
+
desc "aggregator_range","Generate example output of aggregators given a list of values to be aggregated"
|
24
|
+
def aggregator_range
|
25
|
+
a = Evoc::Analyze.new(options)
|
26
|
+
a.aggregator_range
|
27
|
+
end
|
28
|
+
|
29
|
+
desc "uniqueness [options]","Run uniqueness analysis on TRANSACTIONS"
|
30
|
+
long_desc <<-LONGDESC
|
31
|
+
Run an analysises of how many unique transactions there are in TRANSACTIONS.
|
32
|
+
|
33
|
+
The analysis is based on the order present in TRANSACTIONS. A transaction
|
34
|
+
is considered unique if no previous transactions are a superset of the
|
35
|
+
given transaction.
|
36
|
+
LONGDESC
|
37
|
+
def uniqueness
|
38
|
+
a = Evoc::Analyze.new(options)
|
39
|
+
$stdout.puts a.uniqueness
|
40
|
+
end
|
41
|
+
|
42
|
+
desc "all", "Outputs a collection of stats about the input history"
|
43
|
+
def all
|
44
|
+
Evoc::Analyze.new(options).all
|
45
|
+
end
|
46
|
+
|
47
|
+
desc "avg_changes_per_file", "Output the average number of commits that each file is present in"
|
48
|
+
def avg_changes_per_file
|
49
|
+
a = Evoc::Analyze.new(options)
|
50
|
+
$stdout.puts __method__
|
51
|
+
$stdout.puts a.average_changes_per_file
|
52
|
+
end
|
53
|
+
|
54
|
+
desc "num_commits", "Output the number of commits in the input history"
|
55
|
+
def num_commits
|
56
|
+
a = Evoc::Analyze.new(options)
|
57
|
+
$stdout.puts __method__
|
58
|
+
$stdout.puts a.num_commits
|
59
|
+
end
|
60
|
+
|
61
|
+
desc "time_span [options]","Dump the time span between first and last commits in history"
|
62
|
+
def time_span
|
63
|
+
a = Evoc::Analyze.new(options)
|
64
|
+
unit = 'years'
|
65
|
+
$stdout.puts "#{__method__}_#{unit}"
|
66
|
+
$stdout.puts a.time_span(unit: unit)
|
67
|
+
end
|
68
|
+
|
69
|
+
desc "avg_time_between_commits [options]","Dump the average time between each commit"
|
70
|
+
def avg_time_between_commits
|
71
|
+
a = Evoc::Analyze.new(options)
|
72
|
+
unit = 'hours'
|
73
|
+
$stdout.puts "#{__method__}_#{unit}"
|
74
|
+
$stdout.puts a.average_time_between_commits(unit: unit)
|
75
|
+
end
|
76
|
+
|
77
|
+
desc "num_unique_files [options]","Dump the number of unique files mentioned in the history"
|
78
|
+
def num_unique_files
|
79
|
+
a = Evoc::Analyze.new(options)
|
80
|
+
$stdout.puts __method__
|
81
|
+
$stdout.puts a.num_unique_files
|
82
|
+
end
|
83
|
+
|
84
|
+
method_option :top, :aliases => '-n', :type => :numeric, default: 10, :desc => "How many files to return"
|
85
|
+
desc "file_frequency [options]","Return the X most frequent files"
|
86
|
+
def file_frequency
|
87
|
+
a = Evoc::Analyze.new(options)
|
88
|
+
a.file_frequency
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
method_option :group, :aliases => '-g', :type => :boolean, default: false, :desc => "If the commits should be grouped by size"
|
93
|
+
desc "commit_size", "Dumps the commits sizes to stdout"
|
94
|
+
def commit_size
|
95
|
+
a = Evoc::Analyze.new(options)
|
96
|
+
a.commit_size
|
97
|
+
end
|
98
|
+
|
99
|
+
desc "avg_commit_size", "Dumps the average commits size of history"
|
100
|
+
def avg_commit_size
|
101
|
+
a = Evoc::Analyze.new(options)
|
102
|
+
$stdout.puts __method__
|
103
|
+
$stdout.puts a.average_commit_size
|
104
|
+
end
|
105
|
+
|
106
|
+
desc "create_dict", ""
|
107
|
+
def create_dict
|
108
|
+
a = Evoc::Analyze.new(options)
|
109
|
+
a.create_dict
|
110
|
+
end
|
111
|
+
|
112
|
+
desc "recommendation_meta",""
|
113
|
+
def recommendation_meta
|
114
|
+
|
115
|
+
int i = 1
|
116
|
+
ARGF.each_line do |line|
|
117
|
+
|
118
|
+
rec = JSON.parse(line)
|
119
|
+
|
120
|
+
tx_id = rec["tx_id"]
|
121
|
+
algorithm = rec["algorithm"]
|
122
|
+
ap = rec["average_precision1000"]
|
123
|
+
rank = nil
|
124
|
+
if !ap.nil?
|
125
|
+
if ap > 0
|
126
|
+
rat = ap.rationalize
|
127
|
+
# the rank is only valid if we obtained a 1 in the numerator
|
128
|
+
if rat.numerator == 1
|
129
|
+
rank = rat.denominator
|
130
|
+
end
|
131
|
+
end
|
132
|
+
else
|
133
|
+
$stderr.puts "AP was nil for #{tx_id}"
|
134
|
+
end
|
135
|
+
applicable = !rec["rules"].empty?
|
136
|
+
positive_recommendation = !rank.nil?
|
137
|
+
# find the confidence of the correct item
|
138
|
+
expected_outcome = nil
|
139
|
+
confidence_correct = nil
|
140
|
+
rank_one = nil
|
141
|
+
confidence_incorrect = nil
|
142
|
+
discernibility = nil
|
143
|
+
ordered_rec = unique_strongest_consequents(rec["rules"])
|
144
|
+
if ordered_rec.size > 0
|
145
|
+
if positive_recommendation
|
146
|
+
expected_outcome = ordered_rec[rank-1][0]
|
147
|
+
confidence_correct = ordered_rec[rank-1][1]
|
148
|
+
else
|
149
|
+
rank_one = ordered_rec[0][0]
|
150
|
+
confidence_incorrect = ordered_rec[0][1]
|
151
|
+
end
|
152
|
+
discernibility = (ordered_rec.group_by{|(_,confidence)| confidence}.size).to_f/ordered_rec.size
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
# build output hash
|
157
|
+
hash = {tx_id: tx_id,
|
158
|
+
algorithm: algorithm,
|
159
|
+
ap: ap,
|
160
|
+
rank: rank,
|
161
|
+
applicable: applicable,
|
162
|
+
positive_recommendation: positive_recommendation,
|
163
|
+
expected_outcome: expected_outcome,
|
164
|
+
confidence_correct: confidence_correct,
|
165
|
+
rank_one: rank_one,
|
166
|
+
confidence_incorrect: confidence_incorrect,
|
167
|
+
discernibility: discernibility}
|
168
|
+
begin
|
169
|
+
puts hash.to_json
|
170
|
+
rescue JSON::GeneratorError => e
|
171
|
+
$stderr.puts "Failed to convert hash to JSON, error was #{e}\n the hash was \n#{hash}\n\n"
|
172
|
+
next
|
173
|
+
end
|
174
|
+
$stderr.print "#{i} lines processed"
|
175
|
+
i += 1
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
private
|
180
|
+
def unique_strongest_consequents(rules)
|
181
|
+
selected_consequents = Hash.new{|h,k| h[k] = 0}
|
182
|
+
rules.each do |rule|
|
183
|
+
lhs = rule["lhs"]
|
184
|
+
rhs = rule["rhs"]
|
185
|
+
confidence = rule["measures"]["m_confidence"]
|
186
|
+
if !confidence.nil?
|
187
|
+
if confidence.to_r.to_f > selected_consequents[rhs]
|
188
|
+
selected_consequents[rhs] = confidence.to_r.to_f
|
189
|
+
end
|
190
|
+
else
|
191
|
+
$stderr.puts "Confidence was nil for rule #{lhs} -> #{rhs}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
return selected_consequents.sort_by {|k,v| -v}
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative '../evoc_helper'
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require_relative 'cli_helper'
|
2
|
+
|
3
|
+
module EvocCLI
|
4
|
+
class Experiment < Thor
|
5
|
+
class_option :case_id, type: :string, desc: "Specify case identifier."
|
6
|
+
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
|
+
class_option :transactions, :aliases => '-t', :type => :string, :required => true, :desc => "Path to change-history"
|
8
|
+
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
9
|
+
|
10
|
+
##
|
11
|
+
# sample_transactions
|
12
|
+
method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
|
13
|
+
method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
|
14
|
+
method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
|
15
|
+
method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
|
16
|
+
method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
|
17
|
+
method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
|
18
|
+
desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
|
19
|
+
def sample_transactions
|
20
|
+
e = Evoc::Experiment.new(options)
|
21
|
+
STDOUT.puts 'tx_id'
|
22
|
+
STDOUT.puts e.sample_transactions
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# generate_queries
|
27
|
+
#
|
28
|
+
method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
|
29
|
+
method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
|
30
|
+
method_option :select, aliases: '-s', type: :array, default: [],
|
31
|
+
:desc => "Number of items to select for each query"
|
32
|
+
method_option :reverse_select, aliases: '-r', type: :array,
|
33
|
+
desc: "Reverse version of --select (select \"all but\" X)"
|
34
|
+
method_option :percentage, aliases: '-e', type: :array,
|
35
|
+
desc: "Percentage of items to select for each query"
|
36
|
+
method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
|
37
|
+
method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
|
38
|
+
desc "generate_queries [options]", "Generate queries from <transactions>"
|
39
|
+
def generate_queries
|
40
|
+
#MemoryProfiler.start('create_queries',30)
|
41
|
+
e = Evoc::Experiment.new(options)
|
42
|
+
e.generate_queries
|
43
|
+
#MemoryProfiler.stop
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# execute_scenarios
|
48
|
+
#
|
49
|
+
# input: csv of queries from #create_queries
|
50
|
+
# output: query_id, algorithm, average_precision
|
51
|
+
method_option :algorithms, type: :array, default: ['tarmaq0','rose','co_change'], desc: "Which algorithms to use"
|
52
|
+
method_option :measures, type: :array, default: ['support','confidence','support,confidence'], desc: "Which measures to calculate for generated rules."
|
53
|
+
method_option :aggregators, aliases: '-a', type: :array, desc: "Which aggregators to use"
|
54
|
+
method_option :model_size, type: :array, desc: "How many previous transactions to include. 0 = all previous transactions."
|
55
|
+
method_option :model_age, type: :array, desc: "The number of commits between history and query."
|
56
|
+
method_option :max_size, :aliases => '-m', type: :array, desc: "Transactions of size larger than this will be excluded for rule mining"
|
57
|
+
method_option :queries, aliases: '-q', type: :string, required: true, desc: "Path to queries"
|
58
|
+
method_option :permutation, aliases: '-p', type: :numeric,
|
59
|
+
desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
|
60
|
+
method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
|
61
|
+
method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
|
62
|
+
desc "execute_scenarios [options]",""
|
63
|
+
def execute_scenarios
|
64
|
+
if !options[:permutation].nil?
|
65
|
+
STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
|
66
|
+
end
|
67
|
+
#MemoryProfiler.start('execute_scenarios',30)
|
68
|
+
e = Evoc::Experiment.new(options)
|
69
|
+
e.execute_scenarios
|
70
|
+
#MemoryProfiler.stop
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
desc "util SUBCOMMAND [options]", "Various helper functions"
|
75
|
+
subcommand "util", Util
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'cli_helper'
|
2
|
+
|
3
|
+
module EvocCLI
|
4
|
+
class Info < Thor
|
5
|
+
|
6
|
+
desc "measures","Prints the currently implemented interestingness measures"
|
7
|
+
def measures
|
8
|
+
STDOUT.puts Evoc::InterestingnessMeasures.measures.map {|m| m.to_s.sub("m_","")}.join(" ")
|
9
|
+
end
|
10
|
+
|
11
|
+
desc "measure_range","Prints the range of available interestingness measures"
|
12
|
+
def measure_range
|
13
|
+
$stdout.puts "measures,range"
|
14
|
+
Evoc::InterestingnessMeasures.measures.sort.each do |m|
|
15
|
+
min = Evoc::InterestingnessMeasures.get_min(m)
|
16
|
+
max = Evoc::InterestingnessMeasures.get_max(m)
|
17
|
+
range = "[#{min},#{max}]"
|
18
|
+
$stdout.puts "#{m},\"#{range}\""
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|