evoc 3.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/Makefile +4 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/evoc +3 -0
- data/bin/setup +7 -0
- data/evoc.gemspec +30 -0
- data/lib/evoc/algorithm.rb +147 -0
- data/lib/evoc/algorithms/top_k.rb +86 -0
- data/lib/evoc/analyze.rb +395 -0
- data/lib/evoc/array.rb +43 -0
- data/lib/evoc/evaluate.rb +109 -0
- data/lib/evoc/exceptions/aggregation_error.rb +6 -0
- data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
- data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
- data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
- data/lib/evoc/exceptions/no_result.rb +6 -0
- data/lib/evoc/exceptions/non_finite.rb +8 -0
- data/lib/evoc/exceptions/non_numeric.rb +8 -0
- data/lib/evoc/exceptions/not_a_query.rb +6 -0
- data/lib/evoc/exceptions/not_a_result.rb +6 -0
- data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
- data/lib/evoc/exceptions/not_initialized.rb +6 -0
- data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
- data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
- data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
- data/lib/evoc/experiment.rb +239 -0
- data/lib/evoc/hash.rb +56 -0
- data/lib/evoc/history_store.rb +53 -0
- data/lib/evoc/hyper_rule.rb +53 -0
- data/lib/evoc/interestingness_measure.rb +77 -0
- data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
- data/lib/evoc/interestingness_measures.rb +882 -0
- data/lib/evoc/logger.rb +34 -0
- data/lib/evoc/memory_profiler.rb +43 -0
- data/lib/evoc/recommendation_cache.rb +152 -0
- data/lib/evoc/rule.rb +32 -0
- data/lib/evoc/rule_store.rb +340 -0
- data/lib/evoc/scenario.rb +303 -0
- data/lib/evoc/svd.rb +124 -0
- data/lib/evoc/tx.rb +34 -0
- data/lib/evoc/tx_store.rb +379 -0
- data/lib/evoc/version.rb +3 -0
- data/lib/evoc.rb +4 -0
- data/lib/evoc_cli/analyze.rb +198 -0
- data/lib/evoc_cli/cli_helper.rb +1 -0
- data/lib/evoc_cli/experiment.rb +78 -0
- data/lib/evoc_cli/info.rb +22 -0
- data/lib/evoc_cli/main.rb +29 -0
- data/lib/evoc_cli/util.rb +36 -0
- data/lib/evoc_helper.rb +40 -0
- data/mem_profiler/Gemfile.lock +39 -0
- data/mem_profiler/README.md +126 -0
- data/mem_profiler/createdb.rb +4 -0
- data/mem_profiler/db.rb +82 -0
- data/mem_profiler/gemfile +6 -0
- data/mem_profiler/gencsv.rb +64 -0
- data/mem_profiler/genimport.sh +8 -0
- data/mem_profiler/graph.rb +91 -0
- metadata +251 -0
@@ -0,0 +1,379 @@
|
|
1
|
+
module Evoc
|
2
|
+
class TxStore
|
3
|
+
include Enumerable,Logging
|
4
|
+
attr_reader :txes, :items, :tx_index_mapping
|
5
|
+
attr_accessor :name_2_int, :int_2_name
|
6
|
+
|
7
|
+
# CLASS VARIABLES
|
8
|
+
@@case_id = nil
|
9
|
+
|
10
|
+
def initialize(path: nil,case_id: nil, granularity: 'mixed')
|
11
|
+
@txes = []
|
12
|
+
@items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
|
13
|
+
# keeps track of each txs location
|
14
|
+
# in the @txes array using the tx.index variable
|
15
|
+
@tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new
|
16
|
+
# keeps a dictionary of item ids and their full filename
|
17
|
+
# populated when first importing the json file
|
18
|
+
@name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new
|
19
|
+
@int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
|
20
|
+
if !path.nil?
|
21
|
+
load_transactions(path: path, granularity: granularity)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Initialize class variables
|
25
|
+
@@case_id ||= case_id
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
##
|
30
|
+
# self << tx
|
31
|
+
#
|
32
|
+
# adds <tx> and updates @items with which items are changed in which tx
|
33
|
+
def << tx
|
34
|
+
if tx.respond_to?(:items)
|
35
|
+
if tx.index.nil?
|
36
|
+
tx.index = self.size
|
37
|
+
end
|
38
|
+
##
|
39
|
+
# BUILD INTEGER REPRESENTATION
|
40
|
+
# internally, items (e.g., files/methods) are stored as unique integers
|
41
|
+
# but a dictionary is kept updated with item -> integer mappings
|
42
|
+
if !tx.items.all? {|i| i.is_a?(Integer)}
|
43
|
+
integer_representation = []
|
44
|
+
tx.items.each do |item|
|
45
|
+
if !self.name_2_int.key?(item)
|
46
|
+
int = self.name_2_int.size
|
47
|
+
self.name_2_int[item] = int
|
48
|
+
self.int_2_name[int] = item
|
49
|
+
end
|
50
|
+
integer_representation << self.name_2_int[item]
|
51
|
+
end
|
52
|
+
tx.items = integer_representation
|
53
|
+
end
|
54
|
+
##
|
55
|
+
# BUILD ITEM <-> TX MAPPING
|
56
|
+
tx.items.each do |item|
|
57
|
+
if !@items.key?(item)
|
58
|
+
@items[item] = [tx]
|
59
|
+
else
|
60
|
+
@items[item] << tx
|
61
|
+
end
|
62
|
+
end
|
63
|
+
@tx_index_mapping[tx.index] = @txes.size
|
64
|
+
@txes << tx
|
65
|
+
else
|
66
|
+
raise Evoc::Exceptions::NotATransaction.new(tx)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# implementing #each gives us access to all Enumerable methods
|
71
|
+
# select, find_all etc
|
72
|
+
def each &block
|
73
|
+
@txes.each do |tx|
|
74
|
+
if block_given?
|
75
|
+
block.call tx
|
76
|
+
else
|
77
|
+
yield tx
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def first
|
83
|
+
@txes.first
|
84
|
+
end
|
85
|
+
|
86
|
+
def last
|
87
|
+
@txes.last
|
88
|
+
end
|
89
|
+
|
90
|
+
def [] index
|
91
|
+
@txes[index]
|
92
|
+
end
|
93
|
+
|
94
|
+
def to_s
|
95
|
+
history = ""
|
96
|
+
self.txes.reverse.each do |tx|
|
97
|
+
history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n"
|
98
|
+
end
|
99
|
+
history
|
100
|
+
end
|
101
|
+
|
102
|
+
##
|
103
|
+
# clear out the currently loaded transactions
|
104
|
+
def clear
|
105
|
+
@txes.clear
|
106
|
+
@items.clear
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
##
|
111
|
+
# Retrieve a transaction using the given identifier
|
112
|
+
#
|
113
|
+
def get_tx(id:,id_type: :index)
|
114
|
+
tx = nil
|
115
|
+
case id_type
|
116
|
+
when :index
|
117
|
+
raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum)
|
118
|
+
if index = @tx_index_mapping[id]
|
119
|
+
tx = @txes[index]
|
120
|
+
end
|
121
|
+
when :id
|
122
|
+
tx = @txes.find {|tx| tx.id == id }
|
123
|
+
end
|
124
|
+
if tx.nil?
|
125
|
+
raise ArgumentError, "No transaction with #{id_type} #{id}"
|
126
|
+
else
|
127
|
+
return tx
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# Given an item, find those transactions
|
132
|
+
# where the item has been modified
|
133
|
+
# parameters:
|
134
|
+
# item: the item to check
|
135
|
+
# identifier: how to represent the found transactions, either using :index or :id
|
136
|
+
def transactions_of(item, identifier: :index)
|
137
|
+
# if there are no transactions
|
138
|
+
# just return an empty list
|
139
|
+
if self.size.zero?
|
140
|
+
txes = []
|
141
|
+
else
|
142
|
+
# get the transactions
|
143
|
+
# return [] if the item cannot be found
|
144
|
+
txes = self.items.key?(item) ? self.items[item] : []
|
145
|
+
end
|
146
|
+
txes.map(&identifier)
|
147
|
+
end
|
148
|
+
|
149
|
+
##
|
150
|
+
# Returns the relevant transactions of the query
|
151
|
+
# That is: all the transactions where at least one
|
152
|
+
# item from the query were changed
|
153
|
+
#
|
154
|
+
# parameters:
|
155
|
+
# query: a list of items
|
156
|
+
# (optional) strict: if set to true, all the items of the query has had
|
157
|
+
# to be changed in the transaction for it to be included
|
158
|
+
def transactions_of_list(items, strict: false, identifier: :index)
|
159
|
+
if strict
|
160
|
+
items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection
|
161
|
+
else
|
162
|
+
items.map {|item| transactions_of(item, identifier: identifier)}.array_union
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Return the list of items that have changed
|
167
|
+
# with at least one item from the query
|
168
|
+
def relevant_unchanged_items(query)
|
169
|
+
transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union
|
170
|
+
end
|
171
|
+
|
172
|
+
def size
|
173
|
+
@txes.size
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
##
|
178
|
+
# read in a JSON file of transactions
|
179
|
+
#
|
180
|
+
# TRANSACTIONS ARE LOADED IN REVERSE ORDER!
|
181
|
+
#
|
182
|
+
# this implies that the oldest transaction gets index 0 in the txes array
|
183
|
+
# and the newest has index txes.size-1
|
184
|
+
# (givent that the json file is sorted from newest/top to oldest/bottom)
|
185
|
+
#
|
186
|
+
# @param [String] path the path to the json history file
|
187
|
+
# @param [Symbol] granularity one of :mixed,:file or :method
|
188
|
+
def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed')
|
189
|
+
if !path.nil?
|
190
|
+
json = nil
|
191
|
+
if File.extname(path) == '.gz'
|
192
|
+
Zlib::GzipReader.open(path) {|gz|
|
193
|
+
json = gz.read
|
194
|
+
}
|
195
|
+
else
|
196
|
+
json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8')
|
197
|
+
end
|
198
|
+
|
199
|
+
STDERR.puts "Loading transactions using strategy: #{granularity}"
|
200
|
+
json.lines.reverse.each do |json_line|
|
201
|
+
begin
|
202
|
+
json_object = JSON.parse(json_line)
|
203
|
+
if valid_date?(json_object,before,after)
|
204
|
+
tx = nil
|
205
|
+
id = json_object["sha"]
|
206
|
+
date = json_object["date"]
|
207
|
+
if items = json_object["changes"]
|
208
|
+
if !items.compact.empty?
|
209
|
+
case granularity
|
210
|
+
when 'mixed'
|
211
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items.compact)
|
212
|
+
when 'file'
|
213
|
+
# group all items by parsable files, and return only the unique set of filenames
|
214
|
+
items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?)
|
215
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items)
|
216
|
+
when 'method'
|
217
|
+
# group all items by parsable files, return only the methods and @residuals
|
218
|
+
items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s } # group items by parsable files
|
219
|
+
.select {|k,v| !k.empty?} # filter out the non-parsable files
|
220
|
+
.values # get the methods
|
221
|
+
.flatten # flatten the list of list of methods
|
222
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items)
|
223
|
+
when 'file_all'
|
224
|
+
items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name
|
225
|
+
.keys # get the set of files
|
226
|
+
tx = Evoc::Tx.new(id: id,date: date,items: items)
|
227
|
+
else
|
228
|
+
raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}"
|
229
|
+
end
|
230
|
+
else
|
231
|
+
logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)"
|
232
|
+
next
|
233
|
+
end
|
234
|
+
else
|
235
|
+
logger.warn "#{json["sha"]} did not have a \"changes\" field"
|
236
|
+
next
|
237
|
+
end
|
238
|
+
if tx.nil?
|
239
|
+
logger.warn "#{json["sha"]} could not be converted to a tx"
|
240
|
+
next
|
241
|
+
end
|
242
|
+
if tx.items.empty?
|
243
|
+
logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts"
|
244
|
+
next
|
245
|
+
end
|
246
|
+
self << tx
|
247
|
+
end
|
248
|
+
rescue JSON::ParserError => e
|
249
|
+
logger.warn e.message
|
250
|
+
next # skip to next line
|
251
|
+
rescue Evoc::Exceptions::NoDateInJsonObject => e
|
252
|
+
logger.warn e.message
|
253
|
+
next
|
254
|
+
end
|
255
|
+
end
|
256
|
+
STDERR.puts "Loaded #{self.size} transactions from #{path}"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
##
|
261
|
+
# a looser version of #between?
|
262
|
+
# we also allow nil comparisons
|
263
|
+
# if both <after> and <before> are nil we consider the date valid
|
264
|
+
def valid_date?(json_object,after,before)
|
265
|
+
if date = json_object["date"]
|
266
|
+
if after.nil? & before.nil?
|
267
|
+
return true
|
268
|
+
elsif !after.nil? & !before.nil?
|
269
|
+
if date.between?(after, before)
|
270
|
+
return true
|
271
|
+
end
|
272
|
+
elsif !after.nil?
|
273
|
+
if date > after
|
274
|
+
return true
|
275
|
+
end
|
276
|
+
elsif !before.nil?
|
277
|
+
if date < before
|
278
|
+
return true
|
279
|
+
end
|
280
|
+
end
|
281
|
+
else
|
282
|
+
raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field."
|
283
|
+
end
|
284
|
+
return false
|
285
|
+
end
|
286
|
+
|
287
|
+
##
|
288
|
+
# #get_cloned_subset
|
289
|
+
#
|
290
|
+
# Returns a clone of <self> with transactions equal to the index range defined
|
291
|
+
# by
|
292
|
+
# from and including <start_index> to and including <stop_index>
|
293
|
+
# also exclude transactions with size larger than <max_size>
|
294
|
+
def clone_with_subset(start_index,stop_index,max_size = nil)
|
295
|
+
clone = TxStore.new
|
296
|
+
clone.name_2_int = self.name_2_int
|
297
|
+
clone.int_2_name = self.int_2_name
|
298
|
+
if start_index.nil? & stop_index.nil? & max_size.nil? then return self end
|
299
|
+
# if only one of start_index and stop_index is provided, raise exception
|
300
|
+
if !start_index.nil? ^ !stop_index.nil?
|
301
|
+
raise ArgumentError.new "You must provide both a start and end index"
|
302
|
+
end
|
303
|
+
# check that its a valid range
|
304
|
+
if range = self.txes[start_index..stop_index]
|
305
|
+
if max_size.nil?
|
306
|
+
range.each do |tx|
|
307
|
+
clone << tx
|
308
|
+
end
|
309
|
+
else
|
310
|
+
range.select {|tx| tx.size <= max_size}.each do |tx|
|
311
|
+
clone << tx
|
312
|
+
end
|
313
|
+
end
|
314
|
+
else
|
315
|
+
raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}"
|
316
|
+
end
|
317
|
+
clone
|
318
|
+
end
|
319
|
+
|
320
|
+
##
|
321
|
+
# Return a new tx_store containing the specified tx ids
|
322
|
+
def clone_by_indexes(tx_indexes)
|
323
|
+
subset = TxStore.new
|
324
|
+
self.each do |tx|
|
325
|
+
if tx_indexes.include?(tx.index)
|
326
|
+
subset << tx
|
327
|
+
end
|
328
|
+
end
|
329
|
+
return subset
|
330
|
+
end
|
331
|
+
|
332
|
+
############################################
|
333
|
+
# HELPERS #
|
334
|
+
# #
|
335
|
+
# These are meant to be used by all #
|
336
|
+
# algorithm implementations if needed. #
|
337
|
+
# Having these methods here eases testing. #
|
338
|
+
############################################
|
339
|
+
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
##
|
344
|
+
# return a (string) json representation of the tx_store
|
345
|
+
def to_json
|
346
|
+
commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
|
347
|
+
self.each do |tx|
|
348
|
+
sha = tx.id
|
349
|
+
commits[sha][:sha] = sha
|
350
|
+
commits[sha][:date] = tx.date
|
351
|
+
commits[sha][:index] = tx.index
|
352
|
+
commits[sha][:changes][:all] = []
|
353
|
+
tx.items.each {|item| commits[sha][:changes][:all] << item}
|
354
|
+
end
|
355
|
+
# print the commits sorted by index
|
356
|
+
# but dont include the index in the json as there might be "holes" (after filtering etc)
|
357
|
+
JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}})
|
358
|
+
end
|
359
|
+
|
360
|
+
def pretty_print
|
361
|
+
self.txes.reverse.each {|tx| CSV {|row| row << tx.items}}
|
362
|
+
end
|
363
|
+
|
364
|
+
private
|
365
|
+
def parse_date date
|
366
|
+
if !date.nil?
|
367
|
+
begin
|
368
|
+
Time.parse date
|
369
|
+
rescue TypeError => e
|
370
|
+
# something else than string was given as input
|
371
|
+
$stderr.puts "Unable to parse #{date}, error: " + e
|
372
|
+
rescue ArgumentError => e
|
373
|
+
# unable to parse the string for a date
|
374
|
+
$stderr.puts "Unable to parse #{date} for a date, error: " + e
|
375
|
+
end
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
data/lib/evoc/version.rb
ADDED
data/lib/evoc.rb
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
require_relative 'cli_helper'
|
2
|
+
|
3
|
+
module EvocCLI
|
4
|
+
class Analyze < Thor
|
5
|
+
class_option :transactions, :aliases => '-t', :type => :string, :required => false, :desc => "Path to change-history"
|
6
|
+
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
|
+
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
8
|
+
|
9
|
+
|
10
|
+
method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
|
11
|
+
desc "measure_values","Empirically investigate the range of interestingness measures"
|
12
|
+
def measure_values
|
13
|
+
a = Evoc::Analyze.new(options)
|
14
|
+
a.measure_values
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "measure_ranges","Empirically investigate the range of interestingness measures"
|
18
|
+
def measure_ranges
|
19
|
+
a = Evoc::Analyze.new(options)
|
20
|
+
a.measure_ranges
|
21
|
+
end
|
22
|
+
|
23
|
+
desc "aggregator_range","Generate example output of aggregators given a list of values to be aggregated"
|
24
|
+
def aggregator_range
|
25
|
+
a = Evoc::Analyze.new(options)
|
26
|
+
a.aggregator_range
|
27
|
+
end
|
28
|
+
|
29
|
+
desc "uniqueness [options]","Run uniqueness analysis on TRANSACTIONS"
|
30
|
+
long_desc <<-LONGDESC
|
31
|
+
Run an analysises of how many unique transactions there are in TRANSACTIONS.
|
32
|
+
|
33
|
+
The analysis is based on the order present in TRANSACTIONS. A transaction
|
34
|
+
is considered unique if no previous transactions are a superset of the
|
35
|
+
given transaction.
|
36
|
+
LONGDESC
|
37
|
+
def uniqueness
|
38
|
+
a = Evoc::Analyze.new(options)
|
39
|
+
$stdout.puts a.uniqueness
|
40
|
+
end
|
41
|
+
|
42
|
+
desc "all", "Outputs a collection of stats about the input history"
|
43
|
+
def all
|
44
|
+
Evoc::Analyze.new(options).all
|
45
|
+
end
|
46
|
+
|
47
|
+
desc "avg_changes_per_file", "Output the average number of commits that each file is present in"
|
48
|
+
def avg_changes_per_file
|
49
|
+
a = Evoc::Analyze.new(options)
|
50
|
+
$stdout.puts __method__
|
51
|
+
$stdout.puts a.average_changes_per_file
|
52
|
+
end
|
53
|
+
|
54
|
+
desc "num_commits", "Output the number of commits in the input history"
|
55
|
+
def num_commits
|
56
|
+
a = Evoc::Analyze.new(options)
|
57
|
+
$stdout.puts __method__
|
58
|
+
$stdout.puts a.num_commits
|
59
|
+
end
|
60
|
+
|
61
|
+
desc "time_span [options]","Dump the time span between first and last commits in history"
|
62
|
+
def time_span
|
63
|
+
a = Evoc::Analyze.new(options)
|
64
|
+
unit = 'years'
|
65
|
+
$stdout.puts "#{__method__}_#{unit}"
|
66
|
+
$stdout.puts a.time_span(unit: unit)
|
67
|
+
end
|
68
|
+
|
69
|
+
desc "avg_time_between_commits [options]","Dump the average time between each commit"
|
70
|
+
def avg_time_between_commits
|
71
|
+
a = Evoc::Analyze.new(options)
|
72
|
+
unit = 'hours'
|
73
|
+
$stdout.puts "#{__method__}_#{unit}"
|
74
|
+
$stdout.puts a.average_time_between_commits(unit: unit)
|
75
|
+
end
|
76
|
+
|
77
|
+
desc "num_unique_files [options]","Dump the number of unique files mentioned in the history"
|
78
|
+
def num_unique_files
|
79
|
+
a = Evoc::Analyze.new(options)
|
80
|
+
$stdout.puts __method__
|
81
|
+
$stdout.puts a.num_unique_files
|
82
|
+
end
|
83
|
+
|
84
|
+
method_option :top, :aliases => '-n', :type => :numeric, default: 10, :desc => "How many files to return"
|
85
|
+
desc "file_frequency [options]","Return the X most frequent files"
|
86
|
+
def file_frequency
|
87
|
+
a = Evoc::Analyze.new(options)
|
88
|
+
a.file_frequency
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
method_option :group, :aliases => '-g', :type => :boolean, default: false, :desc => "If the commits should be grouped by size"
|
93
|
+
desc "commit_size", "Dumps the commits sizes to stdout"
|
94
|
+
def commit_size
|
95
|
+
a = Evoc::Analyze.new(options)
|
96
|
+
a.commit_size
|
97
|
+
end
|
98
|
+
|
99
|
+
desc "avg_commit_size", "Dumps the average commits size of history"
|
100
|
+
def avg_commit_size
|
101
|
+
a = Evoc::Analyze.new(options)
|
102
|
+
$stdout.puts __method__
|
103
|
+
$stdout.puts a.average_commit_size
|
104
|
+
end
|
105
|
+
|
106
|
+
desc "create_dict", ""
|
107
|
+
def create_dict
|
108
|
+
a = Evoc::Analyze.new(options)
|
109
|
+
a.create_dict
|
110
|
+
end
|
111
|
+
|
112
|
+
desc "recommendation_meta",""
|
113
|
+
def recommendation_meta
|
114
|
+
|
115
|
+
int i = 1
|
116
|
+
ARGF.each_line do |line|
|
117
|
+
|
118
|
+
rec = JSON.parse(line)
|
119
|
+
|
120
|
+
tx_id = rec["tx_id"]
|
121
|
+
algorithm = rec["algorithm"]
|
122
|
+
ap = rec["average_precision1000"]
|
123
|
+
rank = nil
|
124
|
+
if !ap.nil?
|
125
|
+
if ap > 0
|
126
|
+
rat = ap.rationalize
|
127
|
+
# the rank is only valid if we obtained a 1 in the numerator
|
128
|
+
if rat.numerator == 1
|
129
|
+
rank = rat.denominator
|
130
|
+
end
|
131
|
+
end
|
132
|
+
else
|
133
|
+
$stderr.puts "AP was nil for #{tx_id}"
|
134
|
+
end
|
135
|
+
applicable = !rec["rules"].empty?
|
136
|
+
positive_recommendation = !rank.nil?
|
137
|
+
# find the confidence of the correct item
|
138
|
+
expected_outcome = nil
|
139
|
+
confidence_correct = nil
|
140
|
+
rank_one = nil
|
141
|
+
confidence_incorrect = nil
|
142
|
+
discernibility = nil
|
143
|
+
ordered_rec = unique_strongest_consequents(rec["rules"])
|
144
|
+
if ordered_rec.size > 0
|
145
|
+
if positive_recommendation
|
146
|
+
expected_outcome = ordered_rec[rank-1][0]
|
147
|
+
confidence_correct = ordered_rec[rank-1][1]
|
148
|
+
else
|
149
|
+
rank_one = ordered_rec[0][0]
|
150
|
+
confidence_incorrect = ordered_rec[0][1]
|
151
|
+
end
|
152
|
+
discernibility = (ordered_rec.group_by{|(_,confidence)| confidence}.size).to_f/ordered_rec.size
|
153
|
+
end
|
154
|
+
|
155
|
+
|
156
|
+
# build output hash
|
157
|
+
hash = {tx_id: tx_id,
|
158
|
+
algorithm: algorithm,
|
159
|
+
ap: ap,
|
160
|
+
rank: rank,
|
161
|
+
applicable: applicable,
|
162
|
+
positive_recommendation: positive_recommendation,
|
163
|
+
expected_outcome: expected_outcome,
|
164
|
+
confidence_correct: confidence_correct,
|
165
|
+
rank_one: rank_one,
|
166
|
+
confidence_incorrect: confidence_incorrect,
|
167
|
+
discernibility: discernibility}
|
168
|
+
begin
|
169
|
+
puts hash.to_json
|
170
|
+
rescue JSON::GeneratorError => e
|
171
|
+
$stderr.puts "Failed to convert hash to JSON, error was #{e}\n the hash was \n#{hash}\n\n"
|
172
|
+
next
|
173
|
+
end
|
174
|
+
$stderr.print "#{i} lines processed"
|
175
|
+
i += 1
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
private
|
180
|
+
def unique_strongest_consequents(rules)
|
181
|
+
selected_consequents = Hash.new{|h,k| h[k] = 0}
|
182
|
+
rules.each do |rule|
|
183
|
+
lhs = rule["lhs"]
|
184
|
+
rhs = rule["rhs"]
|
185
|
+
confidence = rule["measures"]["m_confidence"]
|
186
|
+
if !confidence.nil?
|
187
|
+
if confidence.to_r.to_f > selected_consequents[rhs]
|
188
|
+
selected_consequents[rhs] = confidence.to_r.to_f
|
189
|
+
end
|
190
|
+
else
|
191
|
+
$stderr.puts "Confidence was nil for rule #{lhs} -> #{rhs}"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
return selected_consequents.sort_by {|k,v| -v}
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative '../evoc_helper'
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require_relative 'cli_helper'
|
2
|
+
|
3
|
+
module EvocCLI
|
4
|
+
class Experiment < Thor
|
5
|
+
class_option :case_id, type: :string, desc: "Specify case identifier."
|
6
|
+
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
|
7
|
+
class_option :transactions, :aliases => '-t', :type => :string, :required => true, :desc => "Path to change-history"
|
8
|
+
class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
|
9
|
+
|
10
|
+
##
|
11
|
+
# sample_transactions
|
12
|
+
method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
|
13
|
+
method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
|
14
|
+
method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
|
15
|
+
method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
|
16
|
+
method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
|
17
|
+
method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
|
18
|
+
desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
|
19
|
+
def sample_transactions
|
20
|
+
e = Evoc::Experiment.new(options)
|
21
|
+
STDOUT.puts 'tx_id'
|
22
|
+
STDOUT.puts e.sample_transactions
|
23
|
+
end
|
24
|
+
|
25
|
+
##
|
26
|
+
# generate_queries
|
27
|
+
#
|
28
|
+
method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
|
29
|
+
method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
|
30
|
+
method_option :select, aliases: '-s', type: :array, default: [],
|
31
|
+
:desc => "Number of items to select for each query"
|
32
|
+
method_option :reverse_select, aliases: '-r', type: :array,
|
33
|
+
desc: "Reverse version of --select (select \"all but\" X)"
|
34
|
+
method_option :percentage, aliases: '-e', type: :array,
|
35
|
+
desc: "Percentage of items to select for each query"
|
36
|
+
method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
|
37
|
+
method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
|
38
|
+
desc "generate_queries [options]", "Generate queries from <transactions>"
|
39
|
+
def generate_queries
|
40
|
+
#MemoryProfiler.start('create_queries',30)
|
41
|
+
e = Evoc::Experiment.new(options)
|
42
|
+
e.generate_queries
|
43
|
+
#MemoryProfiler.stop
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# execute_scenarios
|
48
|
+
#
|
49
|
+
# input: csv of queries from #create_queries
|
50
|
+
# output: query_id, algorithm, average_precision
|
51
|
+
method_option :algorithms, type: :array, default: ['tarmaq0','rose','co_change'], desc: "Which algorithms to use"
|
52
|
+
method_option :measures, type: :array, default: ['support','confidence','support,confidence'], desc: "Which measures to calculate for generated rules."
|
53
|
+
method_option :aggregators, aliases: '-a', type: :array, desc: "Which aggregators to use"
|
54
|
+
method_option :model_size, type: :array, desc: "How many previous transactions to include. 0 = all previous transactions."
|
55
|
+
method_option :model_age, type: :array, desc: "The number of commits between history and query."
|
56
|
+
method_option :max_size, :aliases => '-m', type: :array, desc: "Transactions of size larger than this will be excluded for rule mining"
|
57
|
+
method_option :queries, aliases: '-q', type: :string, required: true, desc: "Path to queries"
|
58
|
+
method_option :permutation, aliases: '-p', type: :numeric,
|
59
|
+
desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
|
60
|
+
method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
|
61
|
+
method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
|
62
|
+
desc "execute_scenarios [options]",""
|
63
|
+
def execute_scenarios
|
64
|
+
if !options[:permutation].nil?
|
65
|
+
STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
|
66
|
+
end
|
67
|
+
#MemoryProfiler.start('execute_scenarios',30)
|
68
|
+
e = Evoc::Experiment.new(options)
|
69
|
+
e.execute_scenarios
|
70
|
+
#MemoryProfiler.stop
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
desc "util SUBCOMMAND [options]", "Various helper functions"
|
75
|
+
subcommand "util", Util
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_relative 'cli_helper'
|
2
|
+
|
3
|
+
module EvocCLI
|
4
|
+
class Info < Thor
|
5
|
+
|
6
|
+
desc "measures","Prints the currently implemented interestingness measures"
|
7
|
+
def measures
|
8
|
+
STDOUT.puts Evoc::InterestingnessMeasures.measures.map {|m| m.to_s.sub("m_","")}.join(" ")
|
9
|
+
end
|
10
|
+
|
11
|
+
desc "measure_range","Prints the range of available interestingness measures"
|
12
|
+
def measure_range
|
13
|
+
$stdout.puts "measures,range"
|
14
|
+
Evoc::InterestingnessMeasures.measures.sort.each do |m|
|
15
|
+
min = Evoc::InterestingnessMeasures.get_min(m)
|
16
|
+
max = Evoc::InterestingnessMeasures.get_max(m)
|
17
|
+
range = "[#{min},#{max}]"
|
18
|
+
$stdout.puts "#{m},\"#{range}\""
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|