evoc 3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/Makefile +4 -0
  8. data/README.md +61 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/evoc +3 -0
  12. data/bin/setup +7 -0
  13. data/evoc.gemspec +30 -0
  14. data/lib/evoc/algorithm.rb +147 -0
  15. data/lib/evoc/algorithms/top_k.rb +86 -0
  16. data/lib/evoc/analyze.rb +395 -0
  17. data/lib/evoc/array.rb +43 -0
  18. data/lib/evoc/evaluate.rb +109 -0
  19. data/lib/evoc/exceptions/aggregation_error.rb +6 -0
  20. data/lib/evoc/exceptions/expectedoutcome_nil_or_empty.rb +6 -0
  21. data/lib/evoc/exceptions/measure_calculation_error.rb +6 -0
  22. data/lib/evoc/exceptions/no_changed_items_in_changes.rb +6 -0
  23. data/lib/evoc/exceptions/no_changes_in_json_object.rb +6 -0
  24. data/lib/evoc/exceptions/no_date_in_json_object.rb +6 -0
  25. data/lib/evoc/exceptions/no_result.rb +6 -0
  26. data/lib/evoc/exceptions/non_finite.rb +8 -0
  27. data/lib/evoc/exceptions/non_numeric.rb +8 -0
  28. data/lib/evoc/exceptions/not_a_query.rb +6 -0
  29. data/lib/evoc/exceptions/not_a_result.rb +6 -0
  30. data/lib/evoc/exceptions/not_a_transaction.rb +6 -0
  31. data/lib/evoc/exceptions/not_initialized.rb +6 -0
  32. data/lib/evoc/exceptions/only_nil_in_changes.rb +6 -0
  33. data/lib/evoc/exceptions/query_nil_or_empty.rb +6 -0
  34. data/lib/evoc/exceptions/unable_to_convert_json_to_tx.rb +6 -0
  35. data/lib/evoc/experiment.rb +239 -0
  36. data/lib/evoc/hash.rb +56 -0
  37. data/lib/evoc/history_store.rb +53 -0
  38. data/lib/evoc/hyper_rule.rb +53 -0
  39. data/lib/evoc/interestingness_measure.rb +77 -0
  40. data/lib/evoc/interestingness_measure_aggregator.rb +147 -0
  41. data/lib/evoc/interestingness_measures.rb +882 -0
  42. data/lib/evoc/logger.rb +34 -0
  43. data/lib/evoc/memory_profiler.rb +43 -0
  44. data/lib/evoc/recommendation_cache.rb +152 -0
  45. data/lib/evoc/rule.rb +32 -0
  46. data/lib/evoc/rule_store.rb +340 -0
  47. data/lib/evoc/scenario.rb +303 -0
  48. data/lib/evoc/svd.rb +124 -0
  49. data/lib/evoc/tx.rb +34 -0
  50. data/lib/evoc/tx_store.rb +379 -0
  51. data/lib/evoc/version.rb +3 -0
  52. data/lib/evoc.rb +4 -0
  53. data/lib/evoc_cli/analyze.rb +198 -0
  54. data/lib/evoc_cli/cli_helper.rb +1 -0
  55. data/lib/evoc_cli/experiment.rb +78 -0
  56. data/lib/evoc_cli/info.rb +22 -0
  57. data/lib/evoc_cli/main.rb +29 -0
  58. data/lib/evoc_cli/util.rb +36 -0
  59. data/lib/evoc_helper.rb +40 -0
  60. data/mem_profiler/Gemfile.lock +39 -0
  61. data/mem_profiler/README.md +126 -0
  62. data/mem_profiler/createdb.rb +4 -0
  63. data/mem_profiler/db.rb +82 -0
  64. data/mem_profiler/gemfile +6 -0
  65. data/mem_profiler/gencsv.rb +64 -0
  66. data/mem_profiler/genimport.sh +8 -0
  67. data/mem_profiler/graph.rb +91 -0
  68. metadata +251 -0
@@ -0,0 +1,379 @@
1
+ module Evoc
2
+ class TxStore
3
+ include Enumerable,Logging
4
+ attr_reader :txes, :items, :tx_index_mapping
5
+ attr_accessor :name_2_int, :int_2_name
6
+
7
+ # CLASS VARIABLES
8
+ @@case_id = nil
9
+
10
+ def initialize(path: nil,case_id: nil, granularity: 'mixed')
11
+ @txes = []
12
+ @items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
13
+ # keeps track of each txs location
14
+ # in the @txes array using the tx.index variable
15
+ @tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new
16
+ # keeps a dictionary of item ids and their full filename
17
+ # populated when first importing the json file
18
+ @name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new
19
+ @int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
20
+ if !path.nil?
21
+ load_transactions(path: path, granularity: granularity)
22
+ end
23
+
24
+ # Initialize class variables
25
+ @@case_id ||= case_id
26
+ end
27
+
28
+
29
+ ##
30
+ # self << tx
31
+ #
32
+ # adds <tx> and updates @items with which items are changed in which tx
33
+ def << tx
34
+ if tx.respond_to?(:items)
35
+ if tx.index.nil?
36
+ tx.index = self.size
37
+ end
38
+ ##
39
+ # BUILD INTEGER REPRESENTATION
40
+ # internally, items (e.g., files/methods) are stored as unique integers
41
+ # but a dictionary is kept updated with item -> integer mappings
42
+ if !tx.items.all? {|i| i.is_a?(Integer)}
43
+ integer_representation = []
44
+ tx.items.each do |item|
45
+ if !self.name_2_int.key?(item)
46
+ int = self.name_2_int.size
47
+ self.name_2_int[item] = int
48
+ self.int_2_name[int] = item
49
+ end
50
+ integer_representation << self.name_2_int[item]
51
+ end
52
+ tx.items = integer_representation
53
+ end
54
+ ##
55
+ # BUILD ITEM <-> TX MAPPING
56
+ tx.items.each do |item|
57
+ if !@items.key?(item)
58
+ @items[item] = [tx]
59
+ else
60
+ @items[item] << tx
61
+ end
62
+ end
63
+ @tx_index_mapping[tx.index] = @txes.size
64
+ @txes << tx
65
+ else
66
+ raise Evoc::Exceptions::NotATransaction.new(tx)
67
+ end
68
+ end
69
+
70
+ # implementing #each gives us access to all Enumerable methods
71
+ # select, find_all etc
72
+ def each &block
73
+ @txes.each do |tx|
74
+ if block_given?
75
+ block.call tx
76
+ else
77
+ yield tx
78
+ end
79
+ end
80
+ end
81
+
82
+ def first
83
+ @txes.first
84
+ end
85
+
86
+ def last
87
+ @txes.last
88
+ end
89
+
90
+ def [] index
91
+ @txes[index]
92
+ end
93
+
94
+ def to_s
95
+ history = ""
96
+ self.txes.reverse.each do |tx|
97
+ history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n"
98
+ end
99
+ history
100
+ end
101
+
102
+ ##
103
+ # clear out the currently loaded transactions
104
+ def clear
105
+ @txes.clear
106
+ @items.clear
107
+ end
108
+
109
+
110
+ ##
111
+ # Retrieve a transaction using the given identifier
112
+ #
113
+ def get_tx(id:,id_type: :index)
114
+ tx = nil
115
+ case id_type
116
+ when :index
117
+ raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum)
118
+ if index = @tx_index_mapping[id]
119
+ tx = @txes[index]
120
+ end
121
+ when :id
122
+ tx = @txes.find {|tx| tx.id == id }
123
+ end
124
+ if tx.nil?
125
+ raise ArgumentError, "No transaction with #{id_type} #{id}"
126
+ else
127
+ return tx
128
+ end
129
+ end
130
+
131
+ # Given an item, find those transactions
132
+ # where the item has been modified
133
+ # parameters:
134
+ # item: the item to check
135
+ # identifier: how to represent the found transactions, either using :index or :id
136
+ def transactions_of(item, identifier: :index)
137
+ # if there are no transactions
138
+ # just return an empty list
139
+ if self.size.zero?
140
+ txes = []
141
+ else
142
+ # get the transactions
143
+ # return [] if the item cannot be found
144
+ txes = self.items.key?(item) ? self.items[item] : []
145
+ end
146
+ txes.map(&identifier)
147
+ end
148
+
149
+ ##
150
+ # Returns the relevant transactions of the query
151
+ # That is: all the transactions where at least one
152
+ # item from the query were changed
153
+ #
154
+ # parameters:
155
+ # query: a list of items
156
+ # (optional) strict: if set to true, all the items of the query has had
157
+ # to be changed in the transaction for it to be included
158
+ def transactions_of_list(items, strict: false, identifier: :index)
159
+ if strict
160
+ items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection
161
+ else
162
+ items.map {|item| transactions_of(item, identifier: identifier)}.array_union
163
+ end
164
+ end
165
+
166
+ # Return the list of items that have changed
167
+ # with at least one item from the query
168
+ def relevant_unchanged_items(query)
169
+ transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union
170
+ end
171
+
172
+ def size
173
+ @txes.size
174
+ end
175
+
176
+
177
+ ##
178
+ # read in a JSON file of transactions
179
+ #
180
+ # TRANSACTIONS ARE LOADED IN REVERSE ORDER!
181
+ #
182
+ # this implies that the oldest transaction gets index 0 in the txes array
183
+ # and the newest has index txes.size-1
184
+ # (givent that the json file is sorted from newest/top to oldest/bottom)
185
+ #
186
+ # @param [String] path the path to the json history file
187
+ # @param [Symbol] granularity one of :mixed,:file or :method
188
+ def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed')
189
+ if !path.nil?
190
+ json = nil
191
+ if File.extname(path) == '.gz'
192
+ Zlib::GzipReader.open(path) {|gz|
193
+ json = gz.read
194
+ }
195
+ else
196
+ json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8')
197
+ end
198
+
199
+ STDERR.puts "Loading transactions using strategy: #{granularity}"
200
+ json.lines.reverse.each do |json_line|
201
+ begin
202
+ json_object = JSON.parse(json_line)
203
+ if valid_date?(json_object,before,after)
204
+ tx = nil
205
+ id = json_object["sha"]
206
+ date = json_object["date"]
207
+ if items = json_object["changes"]
208
+ if !items.compact.empty?
209
+ case granularity
210
+ when 'mixed'
211
+ tx = Evoc::Tx.new(id: id,date: date,items: items.compact)
212
+ when 'file'
213
+ # group all items by parsable files, and return only the unique set of filenames
214
+ items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?)
215
+ tx = Evoc::Tx.new(id: id,date: date,items: items)
216
+ when 'method'
217
+ # group all items by parsable files, return only the methods and @residuals
218
+ items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s } # group items by parsable files
219
+ .select {|k,v| !k.empty?} # filter out the non-parsable files
220
+ .values # get the methods
221
+ .flatten # flatten the list of list of methods
222
+ tx = Evoc::Tx.new(id: id,date: date,items: items)
223
+ when 'file_all'
224
+ items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name
225
+ .keys # get the set of files
226
+ tx = Evoc::Tx.new(id: id,date: date,items: items)
227
+ else
228
+ raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}"
229
+ end
230
+ else
231
+ logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)"
232
+ next
233
+ end
234
+ else
235
+ logger.warn "#{json["sha"]} did not have a \"changes\" field"
236
+ next
237
+ end
238
+ if tx.nil?
239
+ logger.warn "#{json["sha"]} could not be converted to a tx"
240
+ next
241
+ end
242
+ if tx.items.empty?
243
+ logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts"
244
+ next
245
+ end
246
+ self << tx
247
+ end
248
+ rescue JSON::ParserError => e
249
+ logger.warn e.message
250
+ next # skip to next line
251
+ rescue Evoc::Exceptions::NoDateInJsonObject => e
252
+ logger.warn e.message
253
+ next
254
+ end
255
+ end
256
+ STDERR.puts "Loaded #{self.size} transactions from #{path}"
257
+ end
258
+ end
259
+
260
+ ##
261
+ # a looser version of #between?
262
+ # we also allow nil comparisons
263
+ # if both <after> and <before> are nil we consider the date valid
264
+ def valid_date?(json_object,after,before)
265
+ if date = json_object["date"]
266
+ if after.nil? & before.nil?
267
+ return true
268
+ elsif !after.nil? & !before.nil?
269
+ if date.between?(after, before)
270
+ return true
271
+ end
272
+ elsif !after.nil?
273
+ if date > after
274
+ return true
275
+ end
276
+ elsif !before.nil?
277
+ if date < before
278
+ return true
279
+ end
280
+ end
281
+ else
282
+ raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field."
283
+ end
284
+ return false
285
+ end
286
+
287
+ ##
288
+ # #get_cloned_subset
289
+ #
290
+ # Returns a clone of <self> with transactions equal to the index range defined
291
+ # by
292
+ # from and including <start_index> to and including <stop_index>
293
+ # also exclude transactions with size larger than <max_size>
294
+ def clone_with_subset(start_index,stop_index,max_size = nil)
295
+ clone = TxStore.new
296
+ clone.name_2_int = self.name_2_int
297
+ clone.int_2_name = self.int_2_name
298
+ if start_index.nil? & stop_index.nil? & max_size.nil? then return self end
299
+ # if only one of start_index and stop_index is provided, raise exception
300
+ if !start_index.nil? ^ !stop_index.nil?
301
+ raise ArgumentError.new "You must provide both a start and end index"
302
+ end
303
+ # check that its a valid range
304
+ if range = self.txes[start_index..stop_index]
305
+ if max_size.nil?
306
+ range.each do |tx|
307
+ clone << tx
308
+ end
309
+ else
310
+ range.select {|tx| tx.size <= max_size}.each do |tx|
311
+ clone << tx
312
+ end
313
+ end
314
+ else
315
+ raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}"
316
+ end
317
+ clone
318
+ end
319
+
320
+ ##
321
+ # Return a new tx_store containing the specified tx ids
322
+ def clone_by_indexes(tx_indexes)
323
+ subset = TxStore.new
324
+ self.each do |tx|
325
+ if tx_indexes.include?(tx.index)
326
+ subset << tx
327
+ end
328
+ end
329
+ return subset
330
+ end
331
+
332
+ ############################################
333
+ # HELPERS #
334
+ # #
335
+ # These are meant to be used by all #
336
+ # algorithm implementations if needed. #
337
+ # Having these methods here eases testing. #
338
+ ############################################
339
+
340
+
341
+
342
+
343
+ ##
344
+ # return a (string) json representation of the tx_store
345
+ def to_json
346
+ commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
347
+ self.each do |tx|
348
+ sha = tx.id
349
+ commits[sha][:sha] = sha
350
+ commits[sha][:date] = tx.date
351
+ commits[sha][:index] = tx.index
352
+ commits[sha][:changes][:all] = []
353
+ tx.items.each {|item| commits[sha][:changes][:all] << item}
354
+ end
355
+ # print the commits sorted by index
356
+ # but dont include the index in the json as there might be "holes" (after filtering etc)
357
+ JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}})
358
+ end
359
+
360
+ def pretty_print
361
+ self.txes.reverse.each {|tx| CSV {|row| row << tx.items}}
362
+ end
363
+
364
+ private
365
+ def parse_date date
366
+ if !date.nil?
367
+ begin
368
+ Time.parse date
369
+ rescue TypeError => e
370
+ # something else than string was given as input
371
+ $stderr.puts "Unable to parse #{date}, error: " + e
372
+ rescue ArgumentError => e
373
+ # unable to parse the string for a date
374
+ $stderr.puts "Unable to parse #{date} for a date, error: " + e
375
+ end
376
+ end
377
+ end
378
+ end
379
+ end
@@ -0,0 +1,3 @@
1
+ module Evoc
2
+ VERSION = "3.5.0"
3
+ end
data/lib/evoc.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'evoc_helper'
2
+ # dummy module to require other gems to import evoc as: require 'evoc'
3
+ module Evoc
4
+ end
@@ -0,0 +1,198 @@
1
+ require_relative 'cli_helper'
2
+
3
+ module EvocCLI
4
+ class Analyze < Thor
5
+ class_option :transactions, :aliases => '-t', :type => :string, :required => false, :desc => "Path to change-history"
6
+ class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
+ class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
8
+
9
+
10
+ method_option :number, aliases: '-n', type: :numeric, default: 1000, desc: "The number of rules to calculate measures for"
11
+ desc "measure_values","Empirically investigate the range of interestingness measures"
12
+ def measure_values
13
+ a = Evoc::Analyze.new(options)
14
+ a.measure_values
15
+ end
16
+
17
+ desc "measure_ranges","Empirically investigate the range of interestingness measures"
18
+ def measure_ranges
19
+ a = Evoc::Analyze.new(options)
20
+ a.measure_ranges
21
+ end
22
+
23
+ desc "aggregator_range","Generate example output of aggregators given a list of values to be aggregated"
24
+ def aggregator_range
25
+ a = Evoc::Analyze.new(options)
26
+ a.aggregator_range
27
+ end
28
+
29
+ desc "uniqueness [options]","Run uniqueness analysis on TRANSACTIONS"
30
+ long_desc <<-LONGDESC
31
+ Run an analysises of how many unique transactions there are in TRANSACTIONS.
32
+
33
+ The analysis is based on the order present in TRANSACTIONS. A transaction
34
+ is considered unique if no previous transactions are a superset of the
35
+ given transaction.
36
+ LONGDESC
37
+ def uniqueness
38
+ a = Evoc::Analyze.new(options)
39
+ $stdout.puts a.uniqueness
40
+ end
41
+
42
+ desc "all", "Outputs a collection of stats about the input history"
43
+ def all
44
+ Evoc::Analyze.new(options).all
45
+ end
46
+
47
+ desc "avg_changes_per_file", "Output the average number of commits that each file is present in"
48
+ def avg_changes_per_file
49
+ a = Evoc::Analyze.new(options)
50
+ $stdout.puts __method__
51
+ $stdout.puts a.average_changes_per_file
52
+ end
53
+
54
+ desc "num_commits", "Output the number of commits in the input history"
55
+ def num_commits
56
+ a = Evoc::Analyze.new(options)
57
+ $stdout.puts __method__
58
+ $stdout.puts a.num_commits
59
+ end
60
+
61
+ desc "time_span [options]","Dump the time span between first and last commits in history"
62
+ def time_span
63
+ a = Evoc::Analyze.new(options)
64
+ unit = 'years'
65
+ $stdout.puts "#{__method__}_#{unit}"
66
+ $stdout.puts a.time_span(unit: unit)
67
+ end
68
+
69
+ desc "avg_time_between_commits [options]","Dump the average time between each commit"
70
+ def avg_time_between_commits
71
+ a = Evoc::Analyze.new(options)
72
+ unit = 'hours'
73
+ $stdout.puts "#{__method__}_#{unit}"
74
+ $stdout.puts a.average_time_between_commits(unit: unit)
75
+ end
76
+
77
+ desc "num_unique_files [options]","Dump the number of unique files mentioned in the history"
78
+ def num_unique_files
79
+ a = Evoc::Analyze.new(options)
80
+ $stdout.puts __method__
81
+ $stdout.puts a.num_unique_files
82
+ end
83
+
84
+ method_option :top, :aliases => '-n', :type => :numeric, default: 10, :desc => "How many files to return"
85
+ desc "file_frequency [options]","Return the X most frequent files"
86
+ def file_frequency
87
+ a = Evoc::Analyze.new(options)
88
+ a.file_frequency
89
+ end
90
+
91
+
92
+ method_option :group, :aliases => '-g', :type => :boolean, default: false, :desc => "If the commits should be grouped by size"
93
+ desc "commit_size", "Dumps the commits sizes to stdout"
94
+ def commit_size
95
+ a = Evoc::Analyze.new(options)
96
+ a.commit_size
97
+ end
98
+
99
+ desc "avg_commit_size", "Dumps the average commits size of history"
100
+ def avg_commit_size
101
+ a = Evoc::Analyze.new(options)
102
+ $stdout.puts __method__
103
+ $stdout.puts a.average_commit_size
104
+ end
105
+
106
+ desc "create_dict", ""
107
+ def create_dict
108
+ a = Evoc::Analyze.new(options)
109
+ a.create_dict
110
+ end
111
+
112
+ desc "recommendation_meta",""
113
+ def recommendation_meta
114
+
115
+ int i = 1
116
+ ARGF.each_line do |line|
117
+
118
+ rec = JSON.parse(line)
119
+
120
+ tx_id = rec["tx_id"]
121
+ algorithm = rec["algorithm"]
122
+ ap = rec["average_precision1000"]
123
+ rank = nil
124
+ if !ap.nil?
125
+ if ap > 0
126
+ rat = ap.rationalize
127
+ # the rank is only valid if we obtained a 1 in the numerator
128
+ if rat.numerator == 1
129
+ rank = rat.denominator
130
+ end
131
+ end
132
+ else
133
+ $stderr.puts "AP was nil for #{tx_id}"
134
+ end
135
+ applicable = !rec["rules"].empty?
136
+ positive_recommendation = !rank.nil?
137
+ # find the confidence of the correct item
138
+ expected_outcome = nil
139
+ confidence_correct = nil
140
+ rank_one = nil
141
+ confidence_incorrect = nil
142
+ discernibility = nil
143
+ ordered_rec = unique_strongest_consequents(rec["rules"])
144
+ if ordered_rec.size > 0
145
+ if positive_recommendation
146
+ expected_outcome = ordered_rec[rank-1][0]
147
+ confidence_correct = ordered_rec[rank-1][1]
148
+ else
149
+ rank_one = ordered_rec[0][0]
150
+ confidence_incorrect = ordered_rec[0][1]
151
+ end
152
+ discernibility = (ordered_rec.group_by{|(_,confidence)| confidence}.size).to_f/ordered_rec.size
153
+ end
154
+
155
+
156
+ # build output hash
157
+ hash = {tx_id: tx_id,
158
+ algorithm: algorithm,
159
+ ap: ap,
160
+ rank: rank,
161
+ applicable: applicable,
162
+ positive_recommendation: positive_recommendation,
163
+ expected_outcome: expected_outcome,
164
+ confidence_correct: confidence_correct,
165
+ rank_one: rank_one,
166
+ confidence_incorrect: confidence_incorrect,
167
+ discernibility: discernibility}
168
+ begin
169
+ puts hash.to_json
170
+ rescue JSON::GeneratorError => e
171
+ $stderr.puts "Failed to convert hash to JSON, error was #{e}\n the hash was \n#{hash}\n\n"
172
+ next
173
+ end
174
+ $stderr.print "#{i} lines processed"
175
+ i += 1
176
+ end
177
+ end
178
+
179
+ private
180
+ def unique_strongest_consequents(rules)
181
+ selected_consequents = Hash.new{|h,k| h[k] = 0}
182
+ rules.each do |rule|
183
+ lhs = rule["lhs"]
184
+ rhs = rule["rhs"]
185
+ confidence = rule["measures"]["m_confidence"]
186
+ if !confidence.nil?
187
+ if confidence.to_r.to_f > selected_consequents[rhs]
188
+ selected_consequents[rhs] = confidence.to_r.to_f
189
+ end
190
+ else
191
+ $stderr.puts "Confidence was nil for rule #{lhs} -> #{rhs}"
192
+ end
193
+ end
194
+ return selected_consequents.sort_by {|k,v| -v}
195
+ end
196
+ end
197
+ end
198
+
@@ -0,0 +1 @@
1
+ require_relative '../evoc_helper'
@@ -0,0 +1,78 @@
1
+ require_relative 'cli_helper'
2
+
3
+ module EvocCLI
4
+ class Experiment < Thor
5
+ class_option :case_id, type: :string, desc: "Specify case identifier."
6
+ class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to debug.log"
7
+ class_option :transactions, :aliases => '-t', :type => :string, :required => true, :desc => "Path to change-history"
8
+ class_option :granularity, type: :string, default: 'mixed', desc: "The granularity level of imported items from history"
9
+
10
+ ##
11
+ # sample_transactions
12
+ method_option :sample_groups, aliases: "-g", type: :array, required: true, desc: "What tx size groups to sample from"
13
+ method_option :sample_size, aliases: "-s", type: :numeric, required: true, desc: "Number of transactions to sample from each group"
14
+ method_option :minimum_history, :aliases => '-m', type: :numeric, desc: "Filter out transactions which has less previous history than this"
15
+ method_option :maximum_commit_size, type: :numeric, desc: "Filter out transactions which are larger than this before sampling"
16
+ method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
17
+ method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
18
+ desc "sample_transactions [OPTIONS]","Make a sample of transactions (from JSON format)"
19
+ def sample_transactions
20
+ e = Evoc::Experiment.new(options)
21
+ STDOUT.puts 'tx_id'
22
+ STDOUT.puts e.sample_transactions
23
+ end
24
+
25
+ ##
26
+ # generate_queries
27
+ #
28
+ method_option :transaction_ids_path, :aliases => '-i', :type => :string, :required => true, :desc => "Path to file with \\n separated list of transaction ids"
29
+ method_option :random_select, type: :boolean, default: false, desc: "Randomly select a query from each given transaction"
30
+ method_option :select, aliases: '-s', type: :array, default: [],
31
+ :desc => "Number of items to select for each query"
32
+ method_option :reverse_select, aliases: '-r', type: :array,
33
+ desc: "Reverse version of --select (select \"all but\" X)"
34
+ method_option :percentage, aliases: '-e', type: :array,
35
+ desc: "Percentage of items to select for each query"
36
+ method_option :filter_duplicates, aliases: '-d', type: :boolean, desc: "Remove identical queries (same id/algorithm/items/model_size/max_size)"
37
+ method_option :filter_expected_outcome, aliases: '-n', type: :boolean, desc: "Remove new files from the expected outcome"
38
+ desc "generate_queries [options]", "Generate queries from <transactions>"
39
+ def generate_queries
40
+ #MemoryProfiler.start('create_queries',30)
41
+ e = Evoc::Experiment.new(options)
42
+ e.generate_queries
43
+ #MemoryProfiler.stop
44
+ end
45
+
46
+ ##
47
+ # execute_scenarios
48
+ #
49
+ # input: csv of queries from #create_queries
50
+ # output: query_id, algorithm, average_precision
51
+ method_option :algorithms, type: :array, default: ['tarmaq0','rose','co_change'], desc: "Which algorithms to use"
52
+ method_option :measures, type: :array, default: ['support','confidence','support,confidence'], desc: "Which measures to calculate for generated rules."
53
+ method_option :aggregators, aliases: '-a', type: :array, desc: "Which aggregators to use"
54
+ method_option :model_size, type: :array, desc: "How many previous transactions to include. 0 = all previous transactions."
55
+ method_option :model_age, type: :array, desc: "The number of commits between history and query."
56
+ method_option :max_size, :aliases => '-m', type: :array, desc: "Transactions of size larger than this will be excluded for rule mining"
57
+ method_option :queries, aliases: '-q', type: :string, required: true, desc: "Path to queries"
58
+ method_option :permutation, aliases: '-p', type: :numeric,
59
+ desc: "DEPRECATED WILL HAVE NO EFFECT Number of query permutations/replications to produce."
60
+ method_option :fail_safe, type: :string, desc: "If the fail safe file exists, safely exit."
61
+ method_option :evaluators, aliases: '-e', type: :array, enum: ['average_precision'], required: false, desc: "Methods for evaluating the recommendations"
62
+ desc "execute_scenarios [options]",""
63
+ def execute_scenarios
64
+ if !options[:permutation].nil?
65
+ STDERR.puts "Permutation option has been set, but the option is currently disabled and will have no effect"
66
+ end
67
+ #MemoryProfiler.start('execute_scenarios',30)
68
+ e = Evoc::Experiment.new(options)
69
+ e.execute_scenarios
70
+ #MemoryProfiler.stop
71
+ end
72
+
73
+
74
+ desc "util SUBCOMMAND [options]", "Various helper functions"
75
+ subcommand "util", Util
76
+
77
+ end
78
+ end
@@ -0,0 +1,22 @@
1
+ require_relative 'cli_helper'
2
+
3
+ module EvocCLI
4
+ class Info < Thor
5
+
6
+ desc "measures","Prints the currently implemented interestingness measures"
7
+ def measures
8
+ STDOUT.puts Evoc::InterestingnessMeasures.measures.map {|m| m.to_s.sub("m_","")}.join(" ")
9
+ end
10
+
11
+ desc "measure_range","Prints the range of available interestingness measures"
12
+ def measure_range
13
+ $stdout.puts "measures,range"
14
+ Evoc::InterestingnessMeasures.measures.sort.each do |m|
15
+ min = Evoc::InterestingnessMeasures.get_min(m)
16
+ max = Evoc::InterestingnessMeasures.get_max(m)
17
+ range = "[#{min},#{max}]"
18
+ $stdout.puts "#{m},\"#{range}\""
19
+ end
20
+ end
21
+ end
22
+ end