evoc 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/Makefile +4 -0
  8. data/README.md +61 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/evoc +3 -0
  12. data/bin/setup +7 -0
  13. data/evoc.gemspec +30 -0
  14. data/lib/evoc/algorithm.rb +147 -0
  15. data/lib/evoc/algorithms/top_k.rb +86 -0
  16. data/lib/evoc/analyze.rb +395 -0
  17. data/lib/evoc/array.rb +43 -0
  18. data/lib/evoc/evaluate.rb +109 -0
  19. data/lib/evoc/exceptions/aggregation_error.rb +6 -0
  20. data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
  21. data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
  22. data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
  23. data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
  24. data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
  25. data/lib/evoc/exceptions/no_result.rb +6 -0
  26. data/lib/evoc/exceptions/non_finite.rb +8 -0
  27. data/lib/evoc/exceptions/non_numeric.rb +8 -0
  28. data/lib/evoc/exceptions/not_a_query.rb +6 -0
  29. data/lib/evoc/exceptions/not_a_result.rb +6 -0
  30. data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
  31. data/lib/evoc/exceptions/not_initialized.rb +6 -0
  32. data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
  33. data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
  34. data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
  35. data/lib/evoc/experiment.rb +239 -0
  36. data/lib/evoc/hash.rb +56 -0
  37. data/lib/evoc/history_store.rb +53 -0
  38. data/lib/evoc/hyper_rule.rb +53 -0
  39. data/lib/evoc/interestingness_measure.rb +77 -0
  40. data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
  41. data/lib/evoc/interestingness_measures.rb +882 -0
  42. data/lib/evoc/logger.rb +34 -0
  43. data/lib/evoc/memory_profiler.rb +43 -0
  44. data/lib/evoc/recommendation_cache.rb +152 -0
  45. data/lib/evoc/rule.rb +32 -0
  46. data/lib/evoc/rule_store.rb +340 -0
  47. data/lib/evoc/scenario.rb +303 -0
  48. data/lib/evoc/svd.rb +124 -0
  49. data/lib/evoc/tx.rb +34 -0
  50. data/lib/evoc/tx_store.rb +379 -0
  51. data/lib/evoc/version.rb +3 -0
  52. data/lib/evoc.rb +4 -0
  53. data/lib/evoc_cli/analyze.rb +198 -0
  54. data/lib/evoc_cli/cli_helper.rb +1 -0
  55. data/lib/evoc_cli/experiment.rb +78 -0
  56. data/lib/evoc_cli/info.rb +22 -0
  57. data/lib/evoc_cli/main.rb +29 -0
  58. data/lib/evoc_cli/util.rb +36 -0
  59. data/lib/evoc_helper.rb +40 -0
  60. data/mem_profiler/Gemfile.lock +39 -0
  61. data/mem_profiler/README.md +126 -0
  62. data/mem_profiler/createdb.rb +4 -0
  63. data/mem_profiler/db.rb +82 -0
  64. data/mem_profiler/gemfile +6 -0
  65. data/mem_profiler/gencsv.rb +64 -0
  66. data/mem_profiler/genimport.sh +8 -0
  67. data/mem_profiler/graph.rb +91 -0
  68. metadata +251 -0
@@ -0,0 +1,379 @@
1
+ module Evoc
2
+ class TxStore
3
+ include Enumerable,Logging
4
+ attr_reader :txes, :items, :tx_index_mapping
5
+ attr_accessor :name_2_int, :int_2_name
6
+
7
+ # CLASS VARIABLES
8
+ @@case_id = nil
9
+
10
+ def initialize(path: nil,case_id: nil, granularity: 'mixed')
11
+ @txes = []
12
+ @items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
13
+ # keeps track of each txs location
14
+ # in the @txes array using the tx.index variable
15
+ @tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new
16
+ # keeps a dictionary of item ids and their full filename
17
+ # populated when first importing the json file
18
+ @name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new
19
+ @int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
20
+ if !path.nil?
21
+ load_transactions(path: path, granularity: granularity)
22
+ end
23
+
24
+ # Initialize class variables
25
+ @@case_id ||= case_id
26
+ end
27
+
28
+
29
+ ##
30
+ # self << tx
31
+ #
32
+ # adds <tx> and updates @items with which items are changed in which tx
33
+ def << tx
34
+ if tx.respond_to?(:items)
35
+ if tx.index.nil?
36
+ tx.index = self.size
37
+ end
38
+ ##
39
+ # BUILD INTEGER REPRESENTATION
40
+ # internally, items (e.g., files/methods) are stored as unique integers
41
+ # but a dictionary is kept updated with item -> integer mappings
42
+ if !tx.items.all? {|i| i.is_a?(Integer)}
43
+ integer_representation = []
44
+ tx.items.each do |item|
45
+ if !self.name_2_int.key?(item)
46
+ int = self.name_2_int.size
47
+ self.name_2_int[item] = int
48
+ self.int_2_name[int] = item
49
+ end
50
+ integer_representation << self.name_2_int[item]
51
+ end
52
+ tx.items = integer_representation
53
+ end
54
+ ##
55
+ # BUILD ITEM <-> TX MAPPING
56
+ tx.items.each do |item|
57
+ if !@items.key?(item)
58
+ @items[item] = [tx]
59
+ else
60
+ @items[item] << tx
61
+ end
62
+ end
63
+ @tx_index_mapping[tx.index] = @txes.size
64
+ @txes << tx
65
+ else
66
+ raise Evoc::Exceptions::NotATransaction.new(tx)
67
+ end
68
+ end
69
+
70
+ # implementing #each gives us access to all Enumerable methods
71
+ # select, find_all etc
72
+ def each &block
73
+ @txes.each do |tx|
74
+ if block_given?
75
+ block.call tx
76
+ else
77
+ yield tx
78
+ end
79
+ end
80
+ end
81
+
82
+ def first
83
+ @txes.first
84
+ end
85
+
86
+ def last
87
+ @txes.last
88
+ end
89
+
90
+ def [] index
91
+ @txes[index]
92
+ end
93
+
94
+ def to_s
95
+ history = ""
96
+ self.txes.reverse.each do |tx|
97
+ history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n"
98
+ end
99
+ history
100
+ end
101
+
102
+ ##
103
+ # clear out the currently loaded transactions
104
+ def clear
105
+ @txes.clear
106
+ @items.clear
107
+ end
108
+
109
+
110
+ ##
111
+ # Retrieve a transaction using the given identifier
112
+ #
113
+ def get_tx(id:,id_type: :index)
114
+ tx = nil
115
+ case id_type
116
+ when :index
117
+ raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum)
118
+ if index = @tx_index_mapping[id]
119
+ tx = @txes[index]
120
+ end
121
+ when :id
122
+ tx = @txes.find {|tx| tx.id == id }
123
+ end
124
+ if tx.nil?
125
+ raise ArgumentError, "No transaction with #{id_type} #{id}"
126
+ else
127
+ return tx
128
+ end
129
+ end
130
+
131
+ # Given an item, find those transactions
132
+ # where the item has been modified
133
+ # parameters:
134
+ # item: the item to check
135
+ # identifier: how to represent the found transactions, either using :index or :id
136
+ def transactions_of(item, identifier: :index)
137
+ # if there are no transactions
138
+ # just return an empty list
139
+ if self.size.zero?
140
+ txes = []
141
+ else
142
+ # get the transactions
143
+ # return [] if the item cannot be found
144
+ txes = self.items.key?(item) ? self.items[item] : []
145
+ end
146
+ txes.map(&identifier)
147
+ end
148
+
149
+ ##
150
+ # Returns the relevant transactions of the query
151
+ # That is: all the transactions where at least one
152
+ # item from the query were changed
153
+ #
154
+ # parameters:
155
+ # query: a list of items
156
+ # (optional) strict: if set to true, all the items of the query has had
157
+ # to be changed in the transaction for it to be included
158
+ def transactions_of_list(items, strict: false, identifier: :index)
159
+ if strict
160
+ items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection
161
+ else
162
+ items.map {|item| transactions_of(item, identifier: identifier)}.array_union
163
+ end
164
+ end
165
+
166
+ # Return the list of items that have changed
167
+ # with at least one item from the query
168
+ def relevant_unchanged_items(query)
169
+ transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union
170
+ end
171
+
172
+ def size
173
+ @txes.size
174
+ end
175
+
176
+
177
+ ##
178
+ # read in a JSON file of transactions
179
+ #
180
+ # TRANSACTIONS ARE LOADED IN REVERSE ORDER!
181
+ #
182
+ # this implies that the oldest transaction gets index 0 in the txes array
183
+ # and the newest has index txes.size-1
184
+ # (givent that the json file is sorted from newest/top to oldest/bottom)
185
+ #
186
+ # @param [String] path the path to the json history file
187
+ # @param [Symbol] granularity one of :mixed,:file or :method
188
+ def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed')
189
+ if !path.nil?
190
+ json = nil
191
+ if File.extname(path) == '.gz'
192
+ Zlib::GzipReader.open(path) {|gz|
193
+ json = gz.read
194
+ }
195
+ else
196
+ json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8')
197
+ end
198
+
199
+ STDERR.puts "Loading transactions using strategy: #{granularity}"
200
+ json.lines.reverse.each do |json_line|
201
+ begin
202
+ json_object = JSON.parse(json_line)
203
+ if valid_date?(json_object,before,after)
204
+ tx = nil
205
+ id = json_object["sha"]
206
+ date = json_object["date"]
207
+ if items = json_object["changes"]
208
+ if !items.compact.empty?
209
+ case granularity
210
+ when 'mixed'
211
+ tx = Evoc::Tx.new(id: id,date: date,items: items.compact)
212
+ when 'file'
213
+ # group all items by parsable files, and return only the unique set of filenames
214
+ items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?)
215
+ tx = Evoc::Tx.new(id: id,date: date,items: items)
216
+ when 'method'
217
+ # group all items by parsable files, return only the methods and @residuals
218
+ items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s } # group items by parsable files
219
+ .select {|k,v| !k.empty?} # filter out the non-parsable files
220
+ .values # get the methods
221
+ .flatten # flatten the list of list of methods
222
+ tx = Evoc::Tx.new(id: id,date: date,items: items)
223
+ when 'file_all'
224
+ items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name
225
+ .keys # get the set of files
226
+ tx = Evoc::Tx.new(id: id,date: date,items: items)
227
+ else
228
+ raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}"
229
+ end
230
+ else
231
+ logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)"
232
+ next
233
+ end
234
+ else
235
+ logger.warn "#{json["sha"]} did not have a \"changes\" field"
236
+ next
237
+ end
238
+ if tx.nil?
239
+ logger.warn "#{json["sha"]} could not be converted to a tx"
240
+ next
241
+ end
242
+ if tx.items.empty?
243
+ logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts"
244
+ next
245
+ end
246
+ self << tx
247
+ end
248
+ rescue JSON::ParserError => e
249
+ logger.warn e.message
250
+ next # skip to next line
251
+ rescue Evoc::Exceptions::NoDateInJsonObject => e
252
+ logger.warn e.message
253
+ next
254
+ end
255
+ end
256
+ STDERR.puts "Loaded #{self.size} transactions from #{path}"
257
+ end
258
+ end
259
+
260
+ ##
261
+ # a looser version of #between?
262
+ # we also allow nil comparisons
263
+ # if both <after> and <before> are nil we consider the date valid
264
+ def valid_date?(json_object,after,before)
265
+ if date = json_object["date"]
266
+ if after.nil? & before.nil?
267
+ return true
268
+ elsif !after.nil? & !before.nil?
269
+ if date.between?(after, before)
270
+ return true
271
+ end
272
+ elsif !after.nil?
273
+ if date > after
274
+ return true
275
+ end
276
+ elsif !before.nil?
277
+ if date < before
278
+ return true
279
+ end
280
+ end
281
+ else
282
+ raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field."
283
+ end
284
+ return false
285
+ end
286
+
287
+ ##
288
+ # #get_cloned_subset
289
+ #
290
+ # Returns a clone of <self> with transactions equal to the index range defined
291
+ # by
292
+ # from and including <start_index> to and including <stop_index>
293
+ # also exclude transactions with size larger than <max_size>
294
+ def clone_with_subset(start_index,stop_index,max_size = nil)
295
+ clone = TxStore.new
296
+ clone.name_2_int = self.name_2_int
297
+ clone.int_2_name = self.int_2_name
298
+ if start_index.nil? & stop_index.nil? & max_size.nil? then return self end
299
+ # if only one of start_index and stop_index is provided, raise exception
300
+ if !start_index.nil? ^ !stop_index.nil?
301
+ raise ArgumentError.new "You must provide both a start and end index"
302
+ end
303
+ # check that its a valid range
304
+ if range = self.txes[start_index..stop_index]
305
+ if max_size.nil?
306
+ range.each do |tx|
307
+ clone << tx
308
+ end
309
+ else
310
+ range.select {|tx| tx.size <= max_size}.each do |tx|
311
+ clone << tx
312
+ end
313
+ end
314
+ else
315
+ raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}"
316
+ end
317
+ clone
318
+ end
319
+
320
+ ##
321
+ # Return a new tx_store containing the specified tx ids
322
+ def clone_by_indexes(tx_indexes)
323
+ subset = TxStore.new
324
+ self.each do |tx|
325
+ if tx_indexes.include?(tx.index)
326
+ subset << tx
327
+ end
328
+ end
329
+ return subset
330
+ end
331
+
332
+ ############################################
333
+ # HELPERS #
334
+ # #
335
+ # These are meant to be used by all #
336
+ # algorithm implementations if needed. #
337
+ # Having these methods here eases testing. #
338
+ ############################################
339
+
340
+
341
+
342
+
343
+ ##
344
+ # return a (string) json representation of the tx_store
345
+ def to_json
346
+ commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
347
+ self.each do |tx|
348
+ sha = tx.id
349
+ commits[sha][:sha] = sha
350
+ commits[sha][:date] = tx.date
351
+ commits[sha][:index] = tx.index
352
+ commits[sha][:changes][:all] = []
353
+ tx.items.each {|item| commits[sha][:changes][:all] << item}
354
+ end
355
+ # print the commits sorted by index
356
+ # but dont include the index in the json as there might be "holes" (after filtering etc)
357
+ JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}})
358
+ end
359
+
360
+ def pretty_print
361
+ self.txes.reverse.each {|tx| CSV {|row| row << tx.items}}
362
+ end
363
+
364
+ private
365
+ def parse_date date
366
+ if !date.nil?
367
+ begin
368
+ Time.parse date
369
+ rescue TypeError => e
370
+ # something else than string was given as input
371
+ $stderr.puts "Unable to parse #{date}, error: " + e
372
+ rescue ArgumentError => e
373
+ # unable to parse the string for a date
374
+ $stderr.puts "Unable to parse #{date} for a date, error: " + e
375
+ end
376
+ end
377
+ end
378
+ end
379
+ end
@@ -0,0 +1,3 @@
1
+ module Evoc
2
+ VERSION = "3.5.0"
3
+ end
data/lib/evoc.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'evoc_helper'
2
+ # dummy module to require other gems to import evoc as: require 'evoc'
3
+ module Evoc
4
+ end
@@ -0,0 +1,198 @@
1
+ require_relative 'cli_helper'
2
+
3
+ module EvocCLI
4
+ class Analyze < Thor
5
+ class_option :transactions, :aliases => '-t', :type => :string, :required => false, :desc => "Path to change-history"
6
+ class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
+ class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
8
+
9
+
10
+ method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
11
+ desc "measure_values","Empirically investigate the range of interestingness measures"
12
+ def measure_values
13
+ a = Evoc::Analyze.new(options)
14
+ a.measure_values
15
+ end
16
+
17
+ desc "measure_ranges","Empirically investigate the range of interestingness measures"
18
+ def measure_ranges
19
+ a = Evoc::Analyze.new(options)
20
+ a.measure_ranges
21
+ end
22
+
23
+ desc "aggregator_range","Generate example output of aggregators given a list of values to be aggregated"
24
+ def aggregator_range
25
+ a = Evoc::Analyze.new(options)
26
+ a.aggregator_range
27
+ end
28
+
29
+ desc "uniqueness [options]","Run uniqueness analysis on TRANSACTIONS"
30
+ long_desc <<-LONGDESC
31
+ Run an analysises of how many unique transactions there are in TRANSACTIONS.
32
+
33
+ The analysis is based on the order present in TRANSACTIONS. A transaction
34
+ is considered unique if no previous transactions are a superset of the
35
+ given transaction.
36
+ LONGDESC
37
+ def uniqueness
38
+ a = Evoc::Analyze.new(options)
39
+ $stdout.puts a.uniqueness
40
+ end
41
+
42
+ desc "all", "Outputs a collection of stats about the input history"
43
+ def all
44
+ Evoc::Analyze.new(options).all
45
+ end
46
+
47
+ desc "avg_changes_per_file", "Output the average number of commits that each file is present in"
48
+ def avg_changes_per_file
49
+ a = Evoc::Analyze.new(options)
50
+ $stdout.puts __method__
51
+ $stdout.puts a.average_changes_per_file
52
+ end
53
+
54
+ desc "num_commits", "Output the number of commits in the input history"
55
+ def num_commits
56
+ a = Evoc::Analyze.new(options)
57
+ $stdout.puts __method__
58
+ $stdout.puts a.num_commits
59
+ end
60
+
61
+ desc "time_span [options]","Dump the time span between first and last commits in history"
62
+ def time_span
63
+ a = Evoc::Analyze.new(options)
64
+ unit = 'years'
65
+ $stdout.puts "#{__method__}_#{unit}"
66
+ $stdout.puts a.time_span(unit: unit)
67
+ end
68
+
69
+ desc "avg_time_between_commits [options]","Dump the average time between each commit"
70
+ def avg_time_between_commits
71
+ a = Evoc::Analyze.new(options)
72
+ unit = 'hours'
73
+ $stdout.puts "#{__method__}_#{unit}"
74
+ $stdout.puts a.average_time_between_commits(unit: unit)
75
+ end
76
+
77
+ desc "num_unique_files [options]","Dump the number of unique files mentioned in the history"
78
+ def num_unique_files
79
+ a = Evoc::Analyze.new(options)
80
+ $stdout.puts __method__
81
+ $stdout.puts a.num_unique_files
82
+ end
83
+
84
+ method_option :top, :aliases => '-n', :type => :numeric, default: 10, :desc => "How many files to return"
85
+ desc "file_frequency [options]","Return the X most frequent files"
86
+ def file_frequency
87
+ a = Evoc::Analyze.new(options)
88
+ a.file_frequency
89
+ end
90
+
91
+
92
+ method_option :group, :aliases => '-g', :type => :boolean, default: false, :desc => "If the commits should be grouped by size"
93
+ desc "commit_size", "Dumps the commits sizes to stdout"
94
+ def commit_size
95
+ a = Evoc::Analyze.new(options)
96
+ a.commit_size
97
+ end
98
+
99
+ desc "avg_commit_size", "Dumps the average commits size of history"
100
+ def avg_commit_size
101
+ a = Evoc::Analyze.new(options)
102
+ $stdout.puts __method__
103
+ $stdout.puts a.average_commit_size
104
+ end
105
+
106
+ desc "create_dict", ""
107
+ def create_dict
108
+ a = Evoc::Analyze.new(options)
109
+ a.create_dict
110
+ end
111
+
112
+ desc "recommendation_meta",""
113
+ def recommendation_meta
114
+
115
+ int i = 1
116
+ ARGF.each_line do |line|
117
+
118
+ rec = JSON.parse(line)
119
+
120
+ tx_id = rec["tx_id"]
121
+ algorithm = rec["algorithm"]
122
+ ap = rec["average_precision1000"]
123
+ rank = nil
124
+ if !ap.nil?
125
+ if ap > 0
126
+ rat = ap.rationalize
127
+ # the rank is only valid if we obtained a 1 in the numerator
128
+ if rat.numerator == 1
129
+ rank = rat.denominator
130
+ end
131
+ end
132
+ else
133
+ $stderr.puts "AP was nil for #{tx_id}"
134
+ end
135
+ applicable = !rec["rules"].empty?
136
+ positive_recommendation = !rank.nil?
137
+ # find the confidence of the correct item
138
+ expected_outcome = nil
139
+ confidence_correct = nil
140
+ rank_one = nil
141
+ confidence_incorrect = nil
142
+ discernibility = nil
143
+ ordered_rec = unique_strongest_consequents(rec["rules"])
144
+ if ordered_rec.size > 0
145
+ if positive_recommendation
146
+ expected_outcome = ordered_rec[rank-1][0]
147
+ confidence_correct = ordered_rec[rank-1][1]
148
+ else
149
+ rank_one = ordered_rec[0][0]
150
+ confidence_incorrect = ordered_rec[0][1]
151
+ end
152
+ discernibility = (ordered_rec.group_by{|(_,confidence)| confidence}.size).to_f/ordered_rec.size
153
+ end
154
+
155
+
156
+ # build output hash
157
+ hash = {tx_id: tx_id,
158
+ algorithm: algorithm,
159
+ ap: ap,
160
+ rank: rank,
161
+ applicable: applicable,
162
+ positive_recommendation: positive_recommendation,
163
+ expected_outcome: expected_outcome,
164
+ confidence_correct: confidence_correct,
165
+ rank_one: rank_one,
166
+ confidence_incorrect: confidence_incorrect,
167
+ discernibility: discernibility}
168
+ begin
169
+ puts hash.to_json
170
+ rescue JSON::GeneratorError => e
171
+ $stderr.puts "Failed to convert hash to JSON, error was #{e}\n the hash was \n#{hash}\n\n"
172
+ next
173
+ end
174
+ $stderr.print "#{i} lines processed"
175
+ i += 1
176
+ end
177
+ end
178
+
179
+ private
180
+ def unique_strongest_consequents(rules)
181
+ selected_consequents = Hash.new{|h,k| h[k] = 0}
182
+ rules.each do |rule|
183
+ lhs = rule["lhs"]
184
+ rhs = rule["rhs"]
185
+ confidence = rule["measures"]["m_confidence"]
186
+ if !confidence.nil?
187
+ if confidence.to_r.to_f > selected_consequents[rhs]
188
+ selected_consequents[rhs] = confidence.to_r.to_f
189
+ end
190
+ else
191
+ $stderr.puts "Confidence was nil for rule #{lhs} -> #{rhs}"
192
+ end
193
+ end
194
+ return selected_consequents.sort_by {|k,v| -v}
195
+ end
196
+ end
197
+ end
198
+
@@ -0,0 +1 @@
1
+ require_relative '../evoc_helper'
@@ -0,0 +1,78 @@
1
+ require_relative 'cli_helper'
2
+
3
+ module EvocCLI
4
+ class Experiment < Thor
5
+ class_option :case_id, type: :string, desc: "Specify case identifier."
6
+ class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
+ class_option :transactions, :aliases => '-t', :type => :string, :required => true, :desc => "Path to change-history"
8
+ class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
9
+
10
+ ##
11
+ # sample_transactions
12
+ method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
13
+ method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
14
+ method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
15
+ method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
16
+ method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
17
+ method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
18
+ desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
19
+ def sample_transactions
20
+ e = Evoc::Experiment.new(options)
21
+ STDOUT.puts 'tx_id'
22
+ STDOUT.puts e.sample_transactions
23
+ end
24
+
25
+ ##
26
+ # generate_queries
27
+ #
28
+ method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
29
+ method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
30
+ method_option :select, aliases: '-s', type: :array, default: [],
31
+ :desc => "Number of items to select for each query"
32
+ method_option :reverse_select, aliases: '-r', type: :array,
33
+ desc: "Reverse version of --select (select \"all but\" X)"
34
+ method_option :percentage, aliases: '-e', type: :array,
35
+ desc: "Percentage of items to select for each query"
36
+ method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
37
+ method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
38
+ desc "generate_queries [options]", "Generate queries from <transactions>"
39
+ def generate_queries
40
+ #MemoryProfiler.start('create_queries',30)
41
+ e = Evoc::Experiment.new(options)
42
+ e.generate_queries
43
+ #MemoryProfiler.stop
44
+ end
45
+
46
+ ##
47
+ # execute_scenarios
48
+ #
49
+ # input: csv of queries from #create_queries
50
+ # output: query_id, algorithm, average_precision
51
+ method_option :algorithms, type: :array, default: ['tarmaq0','rose','co_change'], desc: "Which algorithms to use"
52
+ method_option :measures, type: :array, default: ['support','confidence','support,confidence'], desc: "Which measures to calculate for generated rules."
53
+ method_option :aggregators, aliases: '-a', type: :array, desc: "Which aggregators to use"
54
+ method_option :model_size, type: :array, desc: "How many previous transactions to include. 0 = all previous transactions."
55
+ method_option :model_age, type: :array, desc: "The number of commits between history and query."
56
+ method_option :max_size, :aliases => '-m', type: :array, desc: "Transactions of size larger than this will be excluded for rule mining"
57
+ method_option :queries, aliases: '-q', type: :string, required: true, desc: "Path to queries"
58
+ method_option :permutation, aliases: '-p', type: :numeric,
59
+ desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
60
+ method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
61
+ method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
62
+ desc "execute_scenarios [options]",""
63
+ def execute_scenarios
64
+ if !options[:permutation].nil?
65
+ STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
66
+ end
67
+ #MemoryProfiler.start('execute_scenarios',30)
68
+ e = Evoc::Experiment.new(options)
69
+ e.execute_scenarios
70
+ #MemoryProfiler.stop
71
+ end
72
+
73
+
74
+ desc "util SUBCOMMAND [options]", "Various helper functions"
75
+ subcommand "util", Util
76
+
77
+ end
78
+ end
@@ -0,0 +1,22 @@
1
+ require_relative 'cli_helper'
2
+
3
+ module EvocCLI
4
+ class Info < Thor
5
+
6
+ desc "measures","Prints the currently implemented interestingness measures"
7
+ def measures
8
+ STDOUT.puts Evoc::InterestingnessMeasures.measures.map {|m| m.to_s.sub("m_","")}.join(" ")
9
+ end
10
+
11
+ desc "measure_range","Prints the range of available interestingness measures"
12
+ def measure_range
13
+ $stdout.puts "measures,range"
14
+ Evoc::InterestingnessMeasures.measures.sort.each do |m|
15
+ min = Evoc::InterestingnessMeasures.get_min(m)
16
+ max = Evoc::InterestingnessMeasures.get_max(m)
17
+ range = "[#{min},#{max}]"
18
+ $stdout.puts "#{m},\"#{range}\""
19
+ end
20
+ end
21
+ end
22
+ end