vcs2json 1.0.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 13238e079776d7f77e6f301c9778f1cd93383e7c
4
- data.tar.gz: ad0e55fbe2d9b7bba56d343fb51c8e2a4d08ed6c
3
+ metadata.gz: c3dfd8c30d50ef1760dd38b36725e30be0b09d9f
4
+ data.tar.gz: 842566ac9e598784693c76cc60c8a3531ca8b218
5
5
  SHA512:
6
- metadata.gz: 77ad00f756a282c3b97fd471f1628bf78048f9fa2416ac0ae5b09e8ea430706d1edd67a97f2e19c7d6a88152446b5acde560135076d8c399b4d62386be083c95
7
- data.tar.gz: 0ecea201b2eecbeae823c4a3fb3d31e37de37a07cd5b4d602efc7bd631f33c8bccf58e18e303e372a64641c28088e57d03bac1f3fcb8f3aeaa53c50ea926564b
6
+ metadata.gz: 74e7b098604e6437ef3df2a51af00e013c84c790b47502352d2c1bb8c251892c71d22fb65035a6dbd1217e4d27499dd8abd5d2c50f4668d0fe0dfed407ff4725
7
+ data.tar.gz: d53aa615168906592464caa112cf55ac0ff8f36a0a7274c3b98317e2c898588142034564f683d0f7a0670b53c78aa947eecbe5d7ea44d8a820702a758c3204de
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ *.DS_Store
1
2
  /.bundle/
2
3
  /.yardoc
3
4
  /Gemfile.lock
data/lib/cli/main.rb CHANGED
@@ -4,20 +4,39 @@ module Vcs2JsonCLI
4
4
  class Main < Thor
5
5
  map %w[--version -v] => :__print_version
6
6
 
7
+ class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to log"
8
+ class_option :logger_location, type: :string, desc: "Which file to print logs to"
9
+
7
10
  desc "--version, -v", "print the version"
8
11
  def __print_version
9
12
  puts Vcs2Json::VERSION
10
13
  end
11
14
 
12
- method_option :ignore, type: :string, desc: "Specify location of .evocignore file"
13
- method_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
14
- method_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
15
- method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
16
- method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
17
- method_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
15
+ ##
16
+ # default thor behavior is to return exit 0 on errors (i.e., success..)
17
+ # by having exit_on_failure return true, exit(1) is returned instead
18
+ def self.exit_on_failure?
19
+ true
20
+ end
21
+
22
+ class_option :ignore, type: :string, desc: "Specify location of .evocignore file"
23
+ class_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
24
+ class_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
25
+ class_option :after, :aliases => '-a', :desc => "Only include commits after this date"
26
+ class_option :before, :aliases => '-b', :desc => "Only include commits before this date"
27
+ class_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
28
+ class_option :fine_grained, type: :boolean, default: true, desc: "Include fine grained change information in output"
29
+ class_option :ignore_comments, type: :boolean, default: false, desc: "Ignore comments when calculating diffs. Only in effect for fine grained changes."
30
+ class_option :ignore_whitespace, type: :boolean, default: false, desc: "Ignore whitespace when calculating diffs. Only in effect for fine grained changes."
31
+ class_option :residuals, type: :boolean, default: true, desc: "Consider changes that happen outside of methods"
32
+
33
+
18
34
  desc "git [options]","Make a dump of the change-history of system using git, output on stdout"
19
- def git
20
- Vcs2Json::Git.new(options).execute
21
- end
35
+ def git
36
+ Vcs2Json::Git.new(options).parse
37
+ end
38
+
39
+ # the default is to use git
40
+ default_task :git
22
41
  end
23
42
  end
@@ -0,0 +1,4 @@
1
+ module SrcML
2
+ class UnsupportedLanguageError < StandardError
3
+ end
4
+ end
@@ -0,0 +1,262 @@
1
+ # Wrapper for the srcML commandline interface,
2
+ # with functions specifically directed at extracting method names and calculating diffs between files.
3
+
4
+ module SrcML
5
+ extend Logging
6
+
7
+ # Hash of supported languages.
8
+ #
9
+ # The file extension must map to a known format for srcML
10
+ LANGUAGES = {'.java' => 'Java',
11
+ '.C' => 'C++',
12
+ '.cc' => 'C++',
13
+ '.cpp' => 'C++',
14
+ '.CPP' => 'C++',
15
+ '.c++' => 'C++',
16
+ '.cp' => 'C++',
17
+ '.c' => 'C'}
18
+
19
+ # Check that SrcML is available
20
+ if system("srcml", '--version')
21
+ SRCML = "srcml"
22
+ else
23
+ $stderr.puts "SrcML is required, please install from www.srcml.com"
24
+ exit
25
+ end
26
+
27
+ ##
28
+ # PUBLIC INTERFACE
29
+ ##
30
+
31
+ ###########
32
+ # OPTIONS #
33
+ ###########
34
+
35
+ # Whether to remove comments from the source
36
+ @@ignore_comments = false
37
+ # Whether to remove whitespace from the source
38
+ @@ignore_whitespace = false
39
+ # Whether to qualify files with their full path or just their basename
40
+ # i.e., /lib/file.a or just file.a
41
+ @@basename_qualify = false
42
+ # Whether to consider changes that happen outside of methods
43
+ @@residuals = false
44
+
45
+ def self.ignore_comments= bool
46
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
47
+ @@ignore_comments = bool
48
+ end
49
+
50
+ def self.ignore_comments?
51
+ @@ignore_comments
52
+ end
53
+
54
+ def self.ignore_whitespace= bool
55
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
56
+ @@ignore_whitespace = bool
57
+ end
58
+
59
+ def self.ignore_whitespace?
60
+ @@ignore_whitespace
61
+ end
62
+
63
+ def self.basename_qualify= bool
64
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
65
+ @@basename_qualify = bool
66
+ end
67
+
68
+ def self.basename_qualify?
69
+ @@basename_qualify
70
+ end
71
+
72
+ def self.residuals= bool
73
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
74
+ @@residuals = bool
75
+ end
76
+
77
+ def self.residuals?
78
+ @@residuals
79
+ end
80
+
81
+
82
+ ###########
83
+ # METHODS #
84
+ ###########
85
+
86
+ ##
87
+ # Calculates the AST of the given file
88
+ #
89
+ # @param [String] path the path to the file
90
+ # @param [String] rev if specified, retrieves the file from the given revision
91
+ #
92
+ # @return [Nokogiri::XML::Document] an AST representation
93
+ def self.ast(path,revision: FALSE)
94
+ # get the file content
95
+ ast = ''
96
+ if revision
97
+ # explicitly call bash to get support for process substitution
98
+ if language = LANGUAGES[File.extname(path)]
99
+ ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
100
+ if !s.success?
101
+ raise ArgumentError, e
102
+ end
103
+ else
104
+ raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
105
+ end
106
+ else
107
+ ast,e,s = Open3.capture3("#{SRCML} #{path}")
108
+ if !s.success?
109
+ raise ArgumentError, e
110
+ end
111
+ end
112
+ # turn into structured xml
113
+ xml = Nokogiri::XML(ast)
114
+ if ignore_comments?
115
+ # remove all comments
116
+ xml.search('comment').each do |c|
117
+ # trailing newline + any number of spaces are removed from the previous node
118
+ # this gives a more intuitive behaviour
119
+ # i.e., the newline + spaces before the comment is considered "part of" the comment
120
+ if previous_node = c.previous_sibling
121
+ previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
122
+ end
123
+ # now remove the comment
124
+ c.remove
125
+ end
126
+ end
127
+ if ignore_whitespace?
128
+ # remove all new lines
129
+ xml.search("text()").each do |node|
130
+ if node.content =~ /\S/
131
+ node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
132
+ else
133
+ node.remove
134
+ end
135
+ end
136
+ end
137
+ return xml
138
+ end
139
+
140
+ ##
141
+ # Returns the methods of the given file
142
+ #
143
+ # If the method has any parameters, the parameter types are also returned with the method name
144
+ #
145
+ # @param [String] path the path to the file
146
+ # @param [String] rev if specified, retrieves the file from the given revision
147
+ # @return [Hash[method_name => method_hash]] a hash storing the methods as keys and the hashed method as values
148
+ def self.methods(path,revision: FALSE)
149
+ ast = ast(path,revision: revision)
150
+ # hash each method and store in hash map with function name as key
151
+ methods = Hash.new
152
+ qualified_file = path
153
+ if basename_qualify?
154
+ qualified_file = File.basename(path)
155
+ end
156
+ # split file based on class declarations
157
+ partitions = ast.search("class")
158
+ if partitions.empty?
159
+ # no classes, just use the full ast
160
+ partitions = [ast]
161
+ end
162
+ partitions.each do |partition|
163
+ # if partitioned into classes, attempt to extract class name
164
+ class_name = ''
165
+ if !partition.document? & name = partition.at_css("/name")
166
+ class_name = name.text
167
+ else
168
+ logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
169
+ end
170
+ partition.search("function").each do |function|
171
+ if name = function.at_css("/name")
172
+ # attempt to extract parameters
173
+ parameters = []
174
+ if parameter_list = function.at_css("/parameter_list")
175
+ parameter_list.search("parameter").each do |p|
176
+ if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
177
+ parameters << parameter.text
178
+ else
179
+ logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
180
+ end
181
+ end
182
+ else
183
+ logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
184
+ end
185
+ method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
186
+ fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
187
+ if block = function.at_css("block")
188
+ methods[fully_qualified_name] = block.content.hash
189
+ else
190
+ logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
191
+ end
192
+ else
193
+ logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
194
+ end
195
+ end
196
+ end
197
+ if residuals?
198
+ # add residuals entry
199
+ # i.e., whats left of the code when all methods are removed
200
+ ast.search("function").remove
201
+ methods[qualified_file+':'+'@residuals'] = ast.content.hash
202
+ end
203
+
204
+ return methods
205
+ end
206
+
207
+ ##
208
+ # Given two Hashes, returns all the keys that either have different values in the two hashes
209
+ # or are not in both hashes.
210
+ #
211
+ # @param: [Hash] old
212
+ # @param: [Hash] new
213
+ # @return [Array<String>]
214
+ def self.different_entries(old,new)
215
+ different = []
216
+ new.each do |k,v|
217
+ # new keys
218
+ if !old.key?(k)
219
+ # puts "KEY NOT IN OLD: #{k}"
220
+ different << k
221
+ # different values for same key
222
+ elsif v != old[k]
223
+ # puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
224
+ different << k
225
+ end
226
+ end
227
+ # keys that are only in old
228
+ deleted_keys = old.keys - new.keys
229
+ if !deleted_keys.empty?
230
+ # puts "KEY NOT IN NEW: #{deleted_keys}"
231
+ different.concat(deleted_keys)
232
+ end
233
+ return different
234
+ end
235
+
236
+
237
+ ##
238
+ # Calculate the changed methods of the file specified by revision and path
239
+ #
240
+ # @param [String] old the path to the old file
241
+ # @param [String] new the path to the new file
242
+ # @return [Array<String>] the changed methods
243
+ def self.changed_methods(old,new)
244
+ methods_old = methods(old)
245
+ methods_new = methods(new)
246
+ return different_entries(methods_old,methods_new)
247
+ end
248
+
249
+ ##
250
+ # Like #changed_methods but retrieves the file from a git revision
251
+ # Calculate the changed methods of the file specified by revision and path
252
+ #
253
+ # @param [String] path the path to the file
254
+ # @param [String] revision the revision to retrieve the file from
255
+ # @return [Array<String>] the changed methods
256
+ def self.changed_methods_git(path,revision)
257
+ methods_new = methods(path, revision: revision)
258
+ methods_old = methods(path, revision: revision+'~1')
259
+ return different_entries(methods_old,methods_new)
260
+ end
261
+ end
262
+
data/lib/vcs2json/git.rb CHANGED
@@ -1,230 +1,242 @@
1
1
  require_relative '../vcs2json_helper'
2
2
 
3
3
  module Vcs2Json
4
- class Git
5
- # Generate separators between fields and commits
6
- FIELD_SEP = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
7
- COMMIT_SEP = Digest::SHA256.hexdigest Time.new.to_s + "commit_sep"
8
-
9
- def initialize(opts)
10
- @opts = opts
11
- self.ignore = @opts[:ignore]
12
- # Create a commit hash that defaults to creating new hashes given hash[:key]
13
- # so we can do 'commit[:commit][:author][:name] = .. ' without creating the :commit and :author hashes first
14
- @commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
15
- # place to stare empty commit ids if they are encountered
16
- @empty_commits = []
17
- # used to decide if we should try to search for more commits
18
- @oldest_commit_in_previous_search = Time.now
19
- end
20
-
21
- def execute
22
- # recursively add commits as long as we have less than :number and there are still more commits to search
23
- begin
24
- add_commits(@opts)
25
- add_integer_mapping
4
+ class Git
5
+ include Logging
6
+
7
+ attr_accessor :number, :fine_grained, :case_id
8
+ attr_reader :ignore
9
+
10
+ # Generate separators between fields and commits
11
+ FIELD_SEP = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
12
+ META_DATA = "%H#{FIELD_SEP}"\
13
+ "%an#{FIELD_SEP}"\
14
+ "%ae#{FIELD_SEP}"\
15
+ "%ad#{FIELD_SEP}"\
16
+ "%cn#{FIELD_SEP}"\
17
+ "%ce#{FIELD_SEP}"\
18
+ "%cd#{FIELD_SEP}"\
19
+ "%B"
20
+
21
+ def initialize(opts)
22
+ self.ignore = opts[:ignore]
23
+ self.before = opts[:before]
24
+ self.after = opts[:after]
25
+ self.number = opts[:number]
26
+ self.fine_grained = opts[:fine_grained]
27
+
28
+ # Set logger level
29
+ Logging.set_location(opts[:logger_location])
30
+ Logging.set_level(opts[:logger_level])
31
+ SrcML.ignore_comments = opts[:ignore_comments]
32
+ SrcML.ignore_whitespace = opts[:ignore_whitespace]
33
+ SrcML.residuals = opts[:residuals]
34
+ end
26
35
 
27
- # sort on date and prune excessive commits
28
- sorted_and_pruned = @commits.sort_by {|id,commit| commit[:date]}.reverse.map {|(_,commit)| commit}.first(@opts[:number])
36
+ def after=(after)
37
+ if !after.nil?
38
+ begin
39
+ Date.parse(after)
40
+ @after = after
41
+ rescue
42
+ STDERR.puts "Invalid date --after=#{after}. Ignoring option."
43
+ @after = nil
44
+ end
45
+ end
46
+ end
29
47
 
30
- # print commits to stdout as json
31
- $stdout.puts JSON.pretty_generate(sorted_and_pruned)
48
+ def after
49
+ @after.nil? ? '' : "--after=\"#{@after}\""
50
+ end
32
51
 
33
- # print ids of empty commits to stderr
34
- if !@empty_commits.empty?
35
- STDERR.puts "EMPTY COMMITS"
36
- STDERR.puts @empty_commits
37
- end
38
- # print additional info to stderr
39
- STDERR.puts "\n\nExtracted #{sorted_and_pruned.size} commits."
40
- rescue EncodingError => e
41
- puts e
42
- end
52
+ def before=(before)
53
+ if !before.nil?
54
+ begin
55
+ Date.parse(before)
56
+ @before = before
57
+ rescue
58
+ STDERR.puts "Invalid date --before=#{before}. Ignoring option."
59
+ @before = nil
43
60
  end
61
+ end
62
+ end
44
63
 
45
- def ignore
46
- @ignore
47
- end
64
+ def before
65
+ @before.nil? ? '' : "--before=\"#{@before}\""
66
+ end
48
67
 
49
- def ignore= path
50
- default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
51
- paths = (path.nil? ? default_locations : [path] + default_locations)
52
- file = nil
53
- ignore = []
54
- paths.each do |p|
55
- if File.exist?(p)
56
- file = File.open(p)
57
- STDERR.puts "Loading files to ignore from #{file.path}"
58
- # return first match
59
- break
60
- end
61
- end
62
- if file.nil?
63
- STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
64
- else
65
- if @opts[:case_id].nil?
66
- STDERR.puts "Id in .evocignore not specified, not ignoring any files."
67
- else
68
- ignore_file = YAML.load(file)
69
- if ignore_file.key?(@opts[:case_id])
70
- ignore = ignore_file[@opts[:case_id]]
71
- if !ignore.nil?
72
- STDERR.puts "Ignoring #{ignore.size} files"
73
- end
74
- else
75
- STDERR.puts "The id: '#{@opts[:case_id]}' not found in #{file.path}"
76
- end
77
- end
78
- end
79
- @ignore = (ignore.nil? ? [] : ignore)
80
- return @ignore
81
- end
82
- private
83
-
84
- def add_commits(opts)
85
- add_meta_information(opts)
86
- add_change_information(opts)
87
-
88
- if @commits.size < @opts[:number]
89
- oldest_commit_in_this_search = get_oldest_commit
90
- if oldest_commit_in_this_search != @oldest_commit_in_previous_search
91
- # we found new commits in this search but still need more
92
- @oldest_commit_in_previous_search = oldest_commit_in_this_search
93
- add_commits(before: oldest_commit_in_this_search, number: (@opts[:number] - @commits.size)*2)
94
- else
95
- STDERR.puts "\nAsked for #{@opts[:number]} commits, only found #{@commits.size} non-empty commits. Searched all the way back to #{oldest_commit_in_this_search}."
96
- end
68
+ def parse
69
+
70
+ # keeps track of number of commits successfully parsed
71
+ commit_counter = 0
72
+
73
+ # keeps track of empty commits
74
+ empty_commits = []
75
+
76
+ ##########################
77
+ # GET LIST OF COMMIT IDS #
78
+ ##########################
79
+
80
+ # getting the list of revision ids is cheap, so we get some extra in case we are unable to parse the required amount in the first 'n' commits
81
+ commit_ids = `git rev-list HEAD #{self.before} #{self.after} -n #{self.number*10} --no-merges`.split
82
+
83
+ ############################
84
+ # ITERATE OVER EACH COMMIT #
85
+ ############################
86
+
87
+ commit_ids.each do |id|
88
+ logger.debug "Parsing commit: #{id}"
89
+ # get the changed files
90
+ changed_files = `git log --pretty=format:'' --name-status #{id} -n 1`.split("\n")
91
+ # removed ignored files
92
+ changed_files.reject! {|i| self.ignore.include?(i)}
93
+ # add files changed info
94
+ if !changed_files.empty?
95
+
96
+ ##################
97
+ # FETCH METADATA #
98
+ ##################
99
+
100
+ raw_commit = `git log --pretty=format:'#{META_DATA}' #{id} -n 1`
101
+ commit = ''
102
+
103
+ ##################
104
+ # CLEAN RAW DATA #
105
+ ##################
106
+
107
+ begin
108
+ # try encoding to utf8
109
+ commit = raw_commit.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
110
+ # need to expliceitely check if the encoding is valid for ruby <= 2.0
111
+ # utf8 -> utf8 will not do anything even with invalid bytes
112
+ # http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
113
+ if !commit.valid_encoding?
114
+ # encode to utf16 first and then back to utf8
115
+ commit.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
116
+ commit.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
97
117
  end
98
- end
99
-
100
- def get_oldest_commit
101
- oldest = nil
102
- if !@commits.empty?
103
- oldest = @commits.first[1][:date]
104
- @commits.each do |sha,info|
105
- if info[:date] < oldest
106
- oldest = info[:date]
107
- end
118
+ rescue ArgumentError
119
+ raise EncodingError.new, "Unable to encode input as UTF-8"
120
+ end
121
+
122
+ ##############################
123
+ # CONSTRUCT OUTPUT HASH/JSON #
124
+ ##############################
125
+
126
+ output_hash = Hash.new
127
+ fields = commit.split(FIELD_SEP)
128
+ sha = fields[0].delete("\n") #remove astray newlines
129
+ output_hash[:sha] = sha
130
+ output_hash[:name] = fields[1]
131
+ output_hash[:email] = fields[2]
132
+ output_hash[:date] = Time.parse fields[3]
133
+ output_hash[:committer_name] = fields[4]
134
+ output_hash[:committer_email]= fields[5]
135
+ output_hash[:committer_date] = Time.parse fields[6]
136
+ output_hash[:message] = fields[7]
137
+ output_hash[:changes] = []
138
+
139
+ #######################################
140
+ # PARSE FILES FOR FINEGRAINED CHANGES #
141
+ #######################################
142
+
143
+ # print progress
144
+
145
+
146
+ changed_files.each_with_index do |line,index|
147
+ STDERR.print "Parsing file #{index+1} of #{changed_files.size} in commit #{commit_counter+1} of #{self.number} \r"
148
+ if !line.empty?
149
+ file_info = line.split("\t")
150
+ file_name = file_info[1]
151
+ status = file_info[0]
152
+ # add finer grained change info
153
+ if self.fine_grained
154
+ begin
155
+ # new file, all methods are new, no need to calculate diff
156
+ if status == 'A'
157
+ SrcML.methods(file_name,revision: id).keys.each {|m| output_hash[:changes] << m}
158
+ # calculate diffs
159
+ else
160
+ SrcML.changed_methods_git(file_name,id).each {|m| output_hash[:changes] << m}
161
+ end
162
+ rescue SrcML::UnsupportedLanguageError
163
+ output_hash[:changes] << file_name
108
164
  end
165
+ else
166
+ output_hash[:changes] << file_name
167
+ end
109
168
  end
110
- return oldest
111
- end
169
+ end # changes_files.each
112
170
 
113
- def hash_2_gitoptions(opts)
114
- before = opts[:before].nil? ? '' : "--before=\"#{opts[:before]}\""
115
- after = opts[:after].nil? ? '' : "--after=\"#{opts[:after]}\""
116
- number = opts[:number].nil? ? '' : "-n #{opts[:number]}"
117
- return "#{before} #{after} #{number} --no-merges"
118
- end
171
+ # Only add commits where at least on changes was detected
172
+ if !output_hash[:changes].empty?
173
+ ###########################
174
+ # PRINT COMMIT TO $stdout #
175
+ ###########################
119
176
 
120
- def add_meta_information(opts)
121
- raw_commits = `git log #{hash_2_gitoptions(opts)} --pretty=format:'%H#{FIELD_SEP}%cn#{FIELD_SEP}%ce#{FIELD_SEP}%cd#{FIELD_SEP}%ad#{FIELD_SEP}%B#{COMMIT_SEP}'`
122
-
123
- begin
124
- encoded = ''
125
- # try encoding to utf8
126
- encoded = raw_commits.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
127
- # need to expliceitely check if the encoding is valid for ruby <= 2.0
128
- # utf8 -> utf8 will not do anything even with invalid bytes
129
- # http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
130
- if !encoded.valid_encoding?
131
- # encode to utf16 first and then back to utf8
132
- encoded.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
133
- encoded.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
134
- end
135
- # split into individual commits
136
- commits_info = encoded.split(COMMIT_SEP)
137
- rescue ArgumentError
138
- raise EncodingError.new, "Unable to encode input as UTF-8"
139
- end
177
+ $stdout.puts output_hash.to_json
140
178
 
141
- commits_info.each do |commit|
142
- fields = commit.split(FIELD_SEP)
143
- sha = fields[0].delete("\n") #remove astray newlines
144
- @commits[sha][:sha] = sha
145
- @commits[sha][:name] = fields[1]
146
- @commits[sha][:email] = fields[2]
147
- @commits[sha][:date] = Time.parse fields[3]
148
- @commits[sha][:author_date] = Time.parse fields[4]
149
- @commits[sha][:message] = fields[5]
150
-
151
- # attempt to parse an issue id from the commit message
152
- if @opts[:issue]
153
- @commits[commit[0]][:issue] = parse_issue(@commits[sha][:message])
154
- end
155
- end
156
- end
179
+ # increase counter for number of commits successfully parsed
180
+ commit_counter += 1
157
181
 
158
- def add_change_information(opts)
159
- commits_changes_type = `git log --pretty=format:'#{FIELD_SEP}%H' --name-status #{hash_2_gitoptions(opts)}`.split(FIELD_SEP)
160
- commits_changes_type.each do |commit|
161
- if !commit.empty?
162
- lines = commit.split("\n")
163
- sha = lines[0]
164
- @commits[sha][:changes][:all] = []
165
- if lines.size > 1
166
- lines[1..-1].each do |line|
167
- if !line.empty?
168
- file_info = line.split("\t")
169
- file_name = file_info[1]
170
- status = file_info[0]
171
- @commits[sha][:changes][:all] << file_name
172
- @commits[sha][:changes][:details][file_name][:filename] = file_name
173
- @commits[sha][:changes][:details][file_name][:status] = parse_status(status)
174
- end
175
- end
176
- end
177
- # filter out ignored files
178
- if !self.ignore.nil?
179
- @commits[sha][:changes][:all].reject! {|i| self.ignore.include?(i)}
180
- end
181
- if @commits[sha][:changes][:all].empty?
182
- @empty_commits << sha
183
- @commits.delete(sha)
184
- end
185
- end
186
- end
187
- end
182
+ ########################################
183
+ # CHECK IF REQUESTED AMOUNT IS REACHED #
184
+ ########################################
188
185
 
189
- def add_integer_mapping
190
- # create file_name -> integer mapping
191
- mapping = Hash.new
192
- index_counter = 0
193
- @commits.each do |sha,info|
194
- integer_representation = []
195
- info[:changes][:all].each do |file|
196
- if mapping[file].nil?
197
- mapping[file] = index_counter
198
- index_counter += 1
199
- end
200
- integer_representation << mapping[file]
201
- info[:changes][:details][file][:id] = mapping[file]
202
- end
203
- info[:changes][:all].clear
204
- info[:changes][:all] = integer_representation
186
+ if commit_counter == self.number
187
+ break # out of loop
205
188
  end
189
+ else # no changes detected in commit
190
+ empty_commits << id
191
+ end
192
+ else # no files in commit
193
+ empty_commits << id
206
194
  end
195
+ end
196
+
197
+ # we may still lack commits after exhaustive search, notify user
198
+ if commit_counter < self.number
199
+ STDERR.puts "Asked for #{self.number} commits, only found #{commit_counter} non-empty commits in the last #{self.number*2} commits"
200
+ end
201
+ # print ids of empty commits to stderr
202
+ if !empty_commits.empty?
203
+ STDERR.puts "EMPTY COMMITS"
204
+ STDERR.puts empty_commits
205
+ end
206
+ end
207
207
 
208
- # simply un-abbreviates the status code given by --name-status
209
- def parse_status(abbreviated_status)
210
- case abbreviated_status
211
- when "A"
212
- "added"
213
- when "M"
214
- "modified"
215
- when "D"
216
- "deleted"
217
- end
208
+ def ignore= path
209
+ default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
210
+ paths = (path.nil? ? default_locations : [path] + default_locations)
211
+ file = nil
212
+ ignore = []
213
+ paths.each do |p|
214
+ if File.exist?(p)
215
+ file = File.open(p)
216
+ STDERR.puts "Loading files to ignore from #{file.path}"
217
+ # return first match
218
+ break
218
219
  end
219
-
220
-
221
- # attempts to parse an issue/bug id from the given commit message
222
- def parse_issue(message)
223
- if match = /(bug|issue) (?<id>\d+)/i.match(message)
224
- return match[:id]
225
- else
226
- return ""
220
+ end
221
+ if file.nil?
222
+ STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
223
+ else
224
+ if self.case_id.nil?
225
+ STDERR.puts "Id in .evocignore not specified, not ignoring any files."
226
+ else
227
+ ignore_file = YAML.load(file)
228
+ if ignore_file.key?(self.case_id)
229
+ ignore = ignore_file[self.case_id]
230
+ if !ignore.nil?
231
+ STDERR.puts "Ignoring #{ignore.size} files"
227
232
  end
233
+ else
234
+ STDERR.puts "The id: '#{self.case_id}' not found in #{file.path}"
235
+ end
228
236
  end
237
+ end
238
+ @ignore = (ignore.nil? ? [] : ignore)
239
+ return @ignore
229
240
  end
241
+ end
230
242
  end
@@ -0,0 +1,43 @@
1
+ # enable logging in classes through 'include Logging'
2
+ module Logging
3
+ def logger
4
+ @logger ||= Logging.logger_for(self.class.name)
5
+ end
6
+
7
+ # Use a hash class-ivar to cache a unique Logger per class:
8
+ @loggers = {}
9
+ @logger_level = 'debug'
10
+ @logger_location = 'vcs2json.log'
11
+
12
+ class << self
13
+ def logger_for(classname)
14
+ @loggers[classname] ||= configure_logger_for(classname)
15
+ end
16
+
17
+ def configure_logger_for(classname)
18
+ logger = Logger.new(@logger_location,'daily')
19
+ logger.progname = classname
20
+ logger.level = const_get('Logger::'+@logger_level.upcase)
21
+ logger
22
+ end
23
+
24
+ def set_location(path)
25
+ @logger_location = path
26
+ end
27
+
28
+ def set_level(level)
29
+ possible_levels = %w(debug info warn error info)
30
+ if !level.nil?
31
+ if !level.empty?
32
+ if possible_levels.include?(level)
33
+ STDERR.puts "Logging level has been set to '#{level}' for output to #{@logger_location}"
34
+ @loggers.each {|l| l.level = const_get('Logger::'+level.upcase)}
35
+ @logger_level = level
36
+ else
37
+ STDERR.puts "Unable to set logger level to #{level}, possible values are #{possible_levels}. Defaulting to 'info'."
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -1,3 +1,3 @@
1
1
  module Vcs2Json
2
- VERSION = "1.0.1"
2
+ VERSION = "2.0.0"
3
3
  end
@@ -4,5 +4,8 @@ require 'json/pure'
4
4
  require 'time'
5
5
  require 'csv'
6
6
  require 'chronic'
7
+ require 'logger' # leveled logging
8
+ require 'nokogiri' # better/faster xml library
9
+ require 'open3' # make system calls and capture stdout/stderr/exitcodes easily
7
10
  require 'require_all'
8
11
  require_rel '/**/*.rb'
data/vcs2json.gemspec CHANGED
@@ -25,4 +25,5 @@ Gem::Specification.new do |spec|
25
25
  spec.add_runtime_dependency "require_all"
26
26
  spec.add_runtime_dependency "json_pure"
27
27
  spec.add_runtime_dependency "chronic"
28
+ spec.add_runtime_dependency "nokogiri"
28
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vcs2json
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-10 00:00:00.000000000 Z
11
+ date: 2016-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
111
125
  description:
112
126
  email:
113
127
  - mail@thomasrolfsnes.com
@@ -136,7 +150,10 @@ files:
136
150
  - lib/exceptions/no_date_field.rb
137
151
  - lib/exceptions/no_file_section.rb
138
152
  - lib/exceptions/no_time_data_in_chafiles_field.rb
153
+ - lib/exceptions/unsupported_language.rb
154
+ - lib/srcML/srcml.rb
139
155
  - lib/vcs2json/git.rb
156
+ - lib/vcs2json/logger.rb
140
157
  - lib/vcs2json/version.rb
141
158
  - lib/vcs2json_helper.rb
142
159
  - vcs2json.gemspec