vcs2json 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 13238e079776d7f77e6f301c9778f1cd93383e7c
4
- data.tar.gz: ad0e55fbe2d9b7bba56d343fb51c8e2a4d08ed6c
3
+ metadata.gz: c3dfd8c30d50ef1760dd38b36725e30be0b09d9f
4
+ data.tar.gz: 842566ac9e598784693c76cc60c8a3531ca8b218
5
5
  SHA512:
6
- metadata.gz: 77ad00f756a282c3b97fd471f1628bf78048f9fa2416ac0ae5b09e8ea430706d1edd67a97f2e19c7d6a88152446b5acde560135076d8c399b4d62386be083c95
7
- data.tar.gz: 0ecea201b2eecbeae823c4a3fb3d31e37de37a07cd5b4d602efc7bd631f33c8bccf58e18e303e372a64641c28088e57d03bac1f3fcb8f3aeaa53c50ea926564b
6
+ metadata.gz: 74e7b098604e6437ef3df2a51af00e013c84c790b47502352d2c1bb8c251892c71d22fb65035a6dbd1217e4d27499dd8abd5d2c50f4668d0fe0dfed407ff4725
7
+ data.tar.gz: d53aa615168906592464caa112cf55ac0ff8f36a0a7274c3b98317e2c898588142034564f683d0f7a0670b53c78aa947eecbe5d7ea44d8a820702a758c3204de
data/.gitignore CHANGED
@@ -1,3 +1,4 @@
1
+ *.DS_Store
1
2
  /.bundle/
2
3
  /.yardoc
3
4
  /Gemfile.lock
data/lib/cli/main.rb CHANGED
@@ -4,20 +4,39 @@ module Vcs2JsonCLI
4
4
  class Main < Thor
5
5
  map %w[--version -v] => :__print_version
6
6
 
7
+ class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to log"
8
+ class_option :logger_location, type: :string, desc: "Which file to print logs to"
9
+
7
10
  desc "--version, -v", "print the version"
8
11
  def __print_version
9
12
  puts Vcs2Json::VERSION
10
13
  end
11
14
 
12
- method_option :ignore, type: :string, desc: "Specify location of .evocignore file"
13
- method_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
14
- method_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
15
- method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
16
- method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
17
- method_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
15
+ ##
16
+ # default thor behavior is to return exit 0 on errors (i.e., success..)
17
+ # by having exit_on_failure return true, exit(1) is returned instead
18
+ def self.exit_on_failure?
19
+ true
20
+ end
21
+
22
+ class_option :ignore, type: :string, desc: "Specify location of .evocignore file"
23
+ class_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
24
+ class_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
25
+ class_option :after, :aliases => '-a', :desc => "Only include commits after this date"
26
+ class_option :before, :aliases => '-b', :desc => "Only include commits before this date"
27
+ class_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
28
+ class_option :fine_grained, type: :boolean, default: true, desc: "Include fine grained change information in output"
29
+ class_option :ignore_comments, type: :boolean, default: false, desc: "Ignore comments when calculating diffs. Only in effect for fine grained changes."
30
+ class_option :ignore_whitespace, type: :boolean, default: false, desc: "Ignore whitespace when calculating diffs. Only in effect for fine grained changes."
31
+ class_option :residuals, type: :boolean, default: true, desc: "Consider changes that happen outside of methods"
32
+
33
+
18
34
  desc "git [options]","Make a dump of the change-history of system using git, output on stdout"
19
- def git
20
- Vcs2Json::Git.new(options).execute
21
- end
35
+ def git
36
+ Vcs2Json::Git.new(options).parse
37
+ end
38
+
39
+ # the default is to use git
40
+ default_task :git
22
41
  end
23
42
  end
@@ -0,0 +1,4 @@
1
+ module SrcML
2
+ class UnsupportedLanguageError < StandardError
3
+ end
4
+ end
@@ -0,0 +1,262 @@
1
+ # Wrapper for the srcML commandline interface,
2
+ # with functions specifically directed at extracting method names and calculating diffs between files.
3
+
4
+ module SrcML
5
+ extend Logging
6
+
7
+ # Hash of supported languages.
8
+ #
9
+ # The file extension must map to a known format for srcML
10
+ LANGUAGES = {'.java' => 'Java',
11
+ '.C' => 'C++',
12
+ '.cc' => 'C++',
13
+ '.cpp' => 'C++',
14
+ '.CPP' => 'C++',
15
+ '.c++' => 'C++',
16
+ '.cp' => 'C++',
17
+ '.c' => 'C'}
18
+
19
+ # Check that SrcML is available
20
+ if system("srcml", '--version')
21
+ SRCML = "srcml"
22
+ else
23
+ $stderr.puts "SrcML is required, please install from www.srcml.com"
24
+ exit
25
+ end
26
+
27
+ ##
28
+ # PUBLIC INTERFACE
29
+ ##
30
+
31
+ ###########
32
+ # OPTIONS #
33
+ ###########
34
+
35
+ # Whether to remove comments from the source
36
+ @@ignore_comments = false
37
+ # Whether to remove whitespace from the source
38
+ @@ignore_whitespace = false
39
+ # Whether to qualify files with their full path or just their basename
40
+ # i.e., /lib/file.a or just file.a
41
+ @@basename_qualify = false
42
+ # Whether to consider changes that happen outside of methods
43
+ @@residuals = false
44
+
45
+ def self.ignore_comments= bool
46
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
47
+ @@ignore_comments = bool
48
+ end
49
+
50
+ def self.ignore_comments?
51
+ @@ignore_comments
52
+ end
53
+
54
+ def self.ignore_whitespace= bool
55
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
56
+ @@ignore_whitespace = bool
57
+ end
58
+
59
+ def self.ignore_whitespace?
60
+ @@ignore_whitespace
61
+ end
62
+
63
+ def self.basename_qualify= bool
64
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
65
+ @@basename_qualify = bool
66
+ end
67
+
68
+ def self.basename_qualify?
69
+ @@basename_qualify
70
+ end
71
+
72
+ def self.residuals= bool
73
+ unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
74
+ @@residuals = bool
75
+ end
76
+
77
+ def self.residuals?
78
+ @@residuals
79
+ end
80
+
81
+
82
+ ###########
83
+ # METHODS #
84
+ ###########
85
+
86
+ ##
87
+ # Calculates the AST of the given file
88
+ #
89
+ # @param [String] path the path to the file
90
+ # @param [String] rev if specified, retrieves the file from the given revision
91
+ #
92
+ # @return [Nokogiri::XML::Document] an AST representation
93
+ def self.ast(path,revision: FALSE)
94
+ # get the file content
95
+ ast = ''
96
+ if revision
97
+ # explicitly call bash to get support for process substitution
98
+ if language = LANGUAGES[File.extname(path)]
99
+ ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
100
+ if !s.success?
101
+ raise ArgumentError, e
102
+ end
103
+ else
104
+ raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
105
+ end
106
+ else
107
+ ast,e,s = Open3.capture3("#{SRCML} #{path}")
108
+ if !s.success?
109
+ raise ArgumentError, e
110
+ end
111
+ end
112
+ # turn into structured xml
113
+ xml = Nokogiri::XML(ast)
114
+ if ignore_comments?
115
+ # remove all comments
116
+ xml.search('comment').each do |c|
117
+ # trailing newline + any number of spaces are removed from the previous node
118
+ # this gives a more intuitive behaviour
119
+ # i.e., the newline + spaces before the comment is considered "part of" the comment
120
+ if previous_node = c.previous_sibling
121
+ previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
122
+ end
123
+ # now remove the comment
124
+ c.remove
125
+ end
126
+ end
127
+ if ignore_whitespace?
128
+ # remove all new lines
129
+ xml.search("text()").each do |node|
130
+ if node.content =~ /\S/
131
+ node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
132
+ else
133
+ node.remove
134
+ end
135
+ end
136
+ end
137
+ return xml
138
+ end
139
+
140
+ ##
141
+ # Returns the methods of the given file
142
+ #
143
+ # If the method has any parameters, the parameter types are also returned with the method name
144
+ #
145
+ # @param [String] path the path to the file
146
+ # @param [String] rev if specified, retrieves the file from the given revision
147
+ # @return [Hash[method_name => method_hash]] a hash storing the methods as keys and the hashed method as values
148
+ def self.methods(path,revision: FALSE)
149
+ ast = ast(path,revision: revision)
150
+ # hash each method and store in hash map with function name as key
151
+ methods = Hash.new
152
+ qualified_file = path
153
+ if basename_qualify?
154
+ qualified_file = File.basename(path)
155
+ end
156
+ # split file based on class declarations
157
+ partitions = ast.search("class")
158
+ if partitions.empty?
159
+ # no classes, just use the full ast
160
+ partitions = [ast]
161
+ end
162
+ partitions.each do |partition|
163
+ # if partitioned into classes, attempt to extract class name
164
+ class_name = ''
165
+ if !partition.document? & name = partition.at_css("/name")
166
+ class_name = name.text
167
+ else
168
+ logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
169
+ end
170
+ partition.search("function").each do |function|
171
+ if name = function.at_css("/name")
172
+ # attempt to extract parameters
173
+ parameters = []
174
+ if parameter_list = function.at_css("/parameter_list")
175
+ parameter_list.search("parameter").each do |p|
176
+ if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
177
+ parameters << parameter.text
178
+ else
179
+ logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
180
+ end
181
+ end
182
+ else
183
+ logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
184
+ end
185
+ method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
186
+ fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
187
+ if block = function.at_css("block")
188
+ methods[fully_qualified_name] = block.content.hash
189
+ else
190
+ logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
191
+ end
192
+ else
193
+ logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
194
+ end
195
+ end
196
+ end
197
+ if residuals?
198
+ # add residuals entry
199
+ # i.e., whats left of the code when all methods are removed
200
+ ast.search("function").remove
201
+ methods[qualified_file+':'+'@residuals'] = ast.content.hash
202
+ end
203
+
204
+ return methods
205
+ end
206
+
207
+ ##
208
+ # Given two Hashes, returns all the keys that either have different values in the two hashes
209
+ # or are not in both hashes.
210
+ #
211
+ # @param: [Hash] old
212
+ # @param: [Hash] new
213
+ # @return [Array<String>]
214
+ def self.different_entries(old,new)
215
+ different = []
216
+ new.each do |k,v|
217
+ # new keys
218
+ if !old.key?(k)
219
+ # puts "KEY NOT IN OLD: #{k}"
220
+ different << k
221
+ # different values for same key
222
+ elsif v != old[k]
223
+ # puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
224
+ different << k
225
+ end
226
+ end
227
+ # keys that are only in old
228
+ deleted_keys = old.keys - new.keys
229
+ if !deleted_keys.empty?
230
+ # puts "KEY NOT IN NEW: #{deleted_keys}"
231
+ different.concat(deleted_keys)
232
+ end
233
+ return different
234
+ end
235
+
236
+
237
+ ##
238
+ # Calculate the changed methods of the file specified by revision and path
239
+ #
240
+ # @param [String] old the path to the old file
241
+ # @param [String] new the path to the new file
242
+ # @return [Array<String>] the changed methods
243
+ def self.changed_methods(old,new)
244
+ methods_old = methods(old)
245
+ methods_new = methods(new)
246
+ return different_entries(methods_old,methods_new)
247
+ end
248
+
249
+ ##
250
+ # Like #changed_methods but retrieves the file from a git revision
251
+ # Calculate the changed methods of the file specified by revision and path
252
+ #
253
+ # @param [String] path the path to the file
254
+ # @param [String] revision the revision to retrieve the file from
255
+ # @return [Array<String>] the changed methods
256
+ def self.changed_methods_git(path,revision)
257
+ methods_new = methods(path, revision: revision)
258
+ methods_old = methods(path, revision: revision+'~1')
259
+ return different_entries(methods_old,methods_new)
260
+ end
261
+ end
262
+
data/lib/vcs2json/git.rb CHANGED
@@ -1,230 +1,242 @@
1
1
  require_relative '../vcs2json_helper'
2
2
 
3
3
  module Vcs2Json
4
- class Git
5
- # Generate separators between fields and commits
6
- FIELD_SEP = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
7
- COMMIT_SEP = Digest::SHA256.hexdigest Time.new.to_s + "commit_sep"
8
-
9
- def initialize(opts)
10
- @opts = opts
11
- self.ignore = @opts[:ignore]
12
- # Create a commit hash that defaults to creating new hashes given hash[:key]
13
- # so we can do 'commit[:commit][:author][:name] = .. ' without creating the :commit and :author hashes first
14
- @commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
15
- # place to stare empty commit ids if they are encountered
16
- @empty_commits = []
17
- # used to decide if we should try to search for more commits
18
- @oldest_commit_in_previous_search = Time.now
19
- end
20
-
21
- def execute
22
- # recursively add commits as long as we have less than :number and there are still more commits to search
23
- begin
24
- add_commits(@opts)
25
- add_integer_mapping
4
+ class Git
5
+ include Logging
6
+
7
+ attr_accessor :number, :fine_grained, :case_id
8
+ attr_reader :ignore
9
+
10
+ # Generate separators between fields and commits
11
+ FIELD_SEP = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
12
+ META_DATA = "%H#{FIELD_SEP}"\
13
+ "%an#{FIELD_SEP}"\
14
+ "%ae#{FIELD_SEP}"\
15
+ "%ad#{FIELD_SEP}"\
16
+ "%cn#{FIELD_SEP}"\
17
+ "%ce#{FIELD_SEP}"\
18
+ "%cd#{FIELD_SEP}"\
19
+ "%B"
20
+
21
+ def initialize(opts)
22
+ self.ignore = opts[:ignore]
23
+ self.before = opts[:before]
24
+ self.after = opts[:after]
25
+ self.number = opts[:number]
26
+ self.fine_grained = opts[:fine_grained]
27
+
28
+ # Set logger level
29
+ Logging.set_location(opts[:logger_location])
30
+ Logging.set_level(opts[:logger_level])
31
+ SrcML.ignore_comments = opts[:ignore_comments]
32
+ SrcML.ignore_whitespace = opts[:ignore_whitespace]
33
+ SrcML.residuals = opts[:residuals]
34
+ end
26
35
 
27
- # sort on date and prune excessive commits
28
- sorted_and_pruned = @commits.sort_by {|id,commit| commit[:date]}.reverse.map {|(_,commit)| commit}.first(@opts[:number])
36
+ def after=(after)
37
+ if !after.nil?
38
+ begin
39
+ Date.parse(after)
40
+ @after = after
41
+ rescue
42
+ STDERR.puts "Invalid date --after=#{after}. Ignoring option."
43
+ @after = nil
44
+ end
45
+ end
46
+ end
29
47
 
30
- # print commits to stdout as json
31
- $stdout.puts JSON.pretty_generate(sorted_and_pruned)
48
+ def after
49
+ @after.nil? ? '' : "--after=\"#{@after}\""
50
+ end
32
51
 
33
- # print ids of empty commits to stderr
34
- if !@empty_commits.empty?
35
- STDERR.puts "EMPTY COMMITS"
36
- STDERR.puts @empty_commits
37
- end
38
- # print additional info to stderr
39
- STDERR.puts "\n\nExtracted #{sorted_and_pruned.size} commits."
40
- rescue EncodingError => e
41
- puts e
42
- end
52
+ def before=(before)
53
+ if !before.nil?
54
+ begin
55
+ Date.parse(before)
56
+ @before = before
57
+ rescue
58
+ STDERR.puts "Invalid date --before=#{before}. Ignoring option."
59
+ @before = nil
43
60
  end
61
+ end
62
+ end
44
63
 
45
- def ignore
46
- @ignore
47
- end
64
+ def before
65
+ @before.nil? ? '' : "--before=\"#{@before}\""
66
+ end
48
67
 
49
- def ignore= path
50
- default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
51
- paths = (path.nil? ? default_locations : [path] + default_locations)
52
- file = nil
53
- ignore = []
54
- paths.each do |p|
55
- if File.exist?(p)
56
- file = File.open(p)
57
- STDERR.puts "Loading files to ignore from #{file.path}"
58
- # return first match
59
- break
60
- end
61
- end
62
- if file.nil?
63
- STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
64
- else
65
- if @opts[:case_id].nil?
66
- STDERR.puts "Id in .evocignore not specified, not ignoring any files."
67
- else
68
- ignore_file = YAML.load(file)
69
- if ignore_file.key?(@opts[:case_id])
70
- ignore = ignore_file[@opts[:case_id]]
71
- if !ignore.nil?
72
- STDERR.puts "Ignoring #{ignore.size} files"
73
- end
74
- else
75
- STDERR.puts "The id: '#{@opts[:case_id]}' not found in #{file.path}"
76
- end
77
- end
78
- end
79
- @ignore = (ignore.nil? ? [] : ignore)
80
- return @ignore
81
- end
82
- private
83
-
84
- def add_commits(opts)
85
- add_meta_information(opts)
86
- add_change_information(opts)
87
-
88
- if @commits.size < @opts[:number]
89
- oldest_commit_in_this_search = get_oldest_commit
90
- if oldest_commit_in_this_search != @oldest_commit_in_previous_search
91
- # we found new commits in this search but still need more
92
- @oldest_commit_in_previous_search = oldest_commit_in_this_search
93
- add_commits(before: oldest_commit_in_this_search, number: (@opts[:number] - @commits.size)*2)
94
- else
95
- STDERR.puts "\nAsked for #{@opts[:number]} commits, only found #{@commits.size} non-empty commits. Searched all the way back to #{oldest_commit_in_this_search}."
96
- end
68
+ def parse
69
+
70
+ # keeps track of number of commits successfully parsed
71
+ commit_counter = 0
72
+
73
+ # keeps track of empty commits
74
+ empty_commits = []
75
+
76
+ ##########################
77
+ # GET LIST OF COMMIT IDS #
78
+ ##########################
79
+
80
+ # getting the list of revision ids is cheap, so we get some extra in case we are unable to parse the required amount in the first 'n' commits
81
+ commit_ids = `git rev-list HEAD #{self.before} #{self.after} -n #{self.number*10} --no-merges`.split
82
+
83
+ ############################
84
+ # ITERATE OVER EACH COMMIT #
85
+ ############################
86
+
87
+ commit_ids.each do |id|
88
+ logger.debug "Parsing commit: #{id}"
89
+ # get the changed files
90
+ changed_files = `git log --pretty=format:'' --name-status #{id} -n 1`.split("\n")
91
+ # removed ignored files
92
+ changed_files.reject! {|i| self.ignore.include?(i)}
93
+ # add files changed info
94
+ if !changed_files.empty?
95
+
96
+ ##################
97
+ # FETCH METADATA #
98
+ ##################
99
+
100
+ raw_commit = `git log --pretty=format:'#{META_DATA}' #{id} -n 1`
101
+ commit = ''
102
+
103
+ ##################
104
+ # CLEAN RAW DATA #
105
+ ##################
106
+
107
+ begin
108
+ # try encoding to utf8
109
+ commit = raw_commit.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
110
+ # need to expliceitely check if the encoding is valid for ruby <= 2.0
111
+ # utf8 -> utf8 will not do anything even with invalid bytes
112
+ # http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
113
+ if !commit.valid_encoding?
114
+ # encode to utf16 first and then back to utf8
115
+ commit.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
116
+ commit.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
97
117
  end
98
- end
99
-
100
- def get_oldest_commit
101
- oldest = nil
102
- if !@commits.empty?
103
- oldest = @commits.first[1][:date]
104
- @commits.each do |sha,info|
105
- if info[:date] < oldest
106
- oldest = info[:date]
107
- end
118
+ rescue ArgumentError
119
+ raise EncodingError.new, "Unable to encode input as UTF-8"
120
+ end
121
+
122
+ ##############################
123
+ # CONSTRUCT OUTPUT HASH/JSON #
124
+ ##############################
125
+
126
+ output_hash = Hash.new
127
+ fields = commit.split(FIELD_SEP)
128
+ sha = fields[0].delete("\n") #remove astray newlines
129
+ output_hash[:sha] = sha
130
+ output_hash[:name] = fields[1]
131
+ output_hash[:email] = fields[2]
132
+ output_hash[:date] = Time.parse fields[3]
133
+ output_hash[:committer_name] = fields[4]
134
+ output_hash[:committer_email]= fields[5]
135
+ output_hash[:committer_date] = Time.parse fields[6]
136
+ output_hash[:message] = fields[7]
137
+ output_hash[:changes] = []
138
+
139
+ #######################################
140
+ # PARSE FILES FOR FINEGRAINED CHANGES #
141
+ #######################################
142
+
143
+ # print progress
144
+
145
+
146
+ changed_files.each_with_index do |line,index|
147
+ STDERR.print "Parsing file #{index+1} of #{changed_files.size} in commit #{commit_counter+1} of #{self.number} \r"
148
+ if !line.empty?
149
+ file_info = line.split("\t")
150
+ file_name = file_info[1]
151
+ status = file_info[0]
152
+ # add finer grained change info
153
+ if self.fine_grained
154
+ begin
155
+ # new file, all methods are new, no need to calculate diff
156
+ if status == 'A'
157
+ SrcML.methods(file_name,revision: id).keys.each {|m| output_hash[:changes] << m}
158
+ # calculate diffs
159
+ else
160
+ SrcML.changed_methods_git(file_name,id).each {|m| output_hash[:changes] << m}
161
+ end
162
+ rescue SrcML::UnsupportedLanguageError
163
+ output_hash[:changes] << file_name
108
164
  end
165
+ else
166
+ output_hash[:changes] << file_name
167
+ end
109
168
  end
110
- return oldest
111
- end
169
+ end # changes_files.each
112
170
 
113
- def hash_2_gitoptions(opts)
114
- before = opts[:before].nil? ? '' : "--before=\"#{opts[:before]}\""
115
- after = opts[:after].nil? ? '' : "--after=\"#{opts[:after]}\""
116
- number = opts[:number].nil? ? '' : "-n #{opts[:number]}"
117
- return "#{before} #{after} #{number} --no-merges"
118
- end
171
+ # Only add commits where at least on changes was detected
172
+ if !output_hash[:changes].empty?
173
+ ###########################
174
+ # PRINT COMMIT TO $stdout #
175
+ ###########################
119
176
 
120
- def add_meta_information(opts)
121
- raw_commits = `git log #{hash_2_gitoptions(opts)} --pretty=format:'%H#{FIELD_SEP}%cn#{FIELD_SEP}%ce#{FIELD_SEP}%cd#{FIELD_SEP}%ad#{FIELD_SEP}%B#{COMMIT_SEP}'`
122
-
123
- begin
124
- encoded = ''
125
- # try encoding to utf8
126
- encoded = raw_commits.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
127
- # need to expliceitely check if the encoding is valid for ruby <= 2.0
128
- # utf8 -> utf8 will not do anything even with invalid bytes
129
- # http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
130
- if !encoded.valid_encoding?
131
- # encode to utf16 first and then back to utf8
132
- encoded.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
133
- encoded.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
134
- end
135
- # split into individual commits
136
- commits_info = encoded.split(COMMIT_SEP)
137
- rescue ArgumentError
138
- raise EncodingError.new, "Unable to encode input as UTF-8"
139
- end
177
+ $stdout.puts output_hash.to_json
140
178
 
141
- commits_info.each do |commit|
142
- fields = commit.split(FIELD_SEP)
143
- sha = fields[0].delete("\n") #remove astray newlines
144
- @commits[sha][:sha] = sha
145
- @commits[sha][:name] = fields[1]
146
- @commits[sha][:email] = fields[2]
147
- @commits[sha][:date] = Time.parse fields[3]
148
- @commits[sha][:author_date] = Time.parse fields[4]
149
- @commits[sha][:message] = fields[5]
150
-
151
- # attempt to parse an issue id from the commit message
152
- if @opts[:issue]
153
- @commits[commit[0]][:issue] = parse_issue(@commits[sha][:message])
154
- end
155
- end
156
- end
179
+ # increase counter for number of commits successfully parsed
180
+ commit_counter += 1
157
181
 
158
- def add_change_information(opts)
159
- commits_changes_type = `git log --pretty=format:'#{FIELD_SEP}%H' --name-status #{hash_2_gitoptions(opts)}`.split(FIELD_SEP)
160
- commits_changes_type.each do |commit|
161
- if !commit.empty?
162
- lines = commit.split("\n")
163
- sha = lines[0]
164
- @commits[sha][:changes][:all] = []
165
- if lines.size > 1
166
- lines[1..-1].each do |line|
167
- if !line.empty?
168
- file_info = line.split("\t")
169
- file_name = file_info[1]
170
- status = file_info[0]
171
- @commits[sha][:changes][:all] << file_name
172
- @commits[sha][:changes][:details][file_name][:filename] = file_name
173
- @commits[sha][:changes][:details][file_name][:status] = parse_status(status)
174
- end
175
- end
176
- end
177
- # filter out ignored files
178
- if !self.ignore.nil?
179
- @commits[sha][:changes][:all].reject! {|i| self.ignore.include?(i)}
180
- end
181
- if @commits[sha][:changes][:all].empty?
182
- @empty_commits << sha
183
- @commits.delete(sha)
184
- end
185
- end
186
- end
187
- end
182
+ ########################################
183
+ # CHECK IF REQUESTED AMOUNT IS REACHED #
184
+ ########################################
188
185
 
189
- def add_integer_mapping
190
- # create file_name -> integer mapping
191
- mapping = Hash.new
192
- index_counter = 0
193
- @commits.each do |sha,info|
194
- integer_representation = []
195
- info[:changes][:all].each do |file|
196
- if mapping[file].nil?
197
- mapping[file] = index_counter
198
- index_counter += 1
199
- end
200
- integer_representation << mapping[file]
201
- info[:changes][:details][file][:id] = mapping[file]
202
- end
203
- info[:changes][:all].clear
204
- info[:changes][:all] = integer_representation
186
+ if commit_counter == self.number
187
+ break # out of loop
205
188
  end
189
+ else # no changes detected in commit
190
+ empty_commits << id
191
+ end
192
+ else # no files in commit
193
+ empty_commits << id
206
194
  end
195
+ end
196
+
197
+ # we may still lack commits after exhaustive search, notify user
198
+ if commit_counter < self.number
199
+ STDERR.puts "Asked for #{self.number} commits, only found #{commit_counter} non-empty commits in the last #{self.number*2} commits"
200
+ end
201
+ # print ids of empty commits to stderr
202
+ if !empty_commits.empty?
203
+ STDERR.puts "EMPTY COMMITS"
204
+ STDERR.puts empty_commits
205
+ end
206
+ end
207
207
 
208
- # simply un-abbreviates the status code given by --name-status
209
- def parse_status(abbreviated_status)
210
- case abbreviated_status
211
- when "A"
212
- "added"
213
- when "M"
214
- "modified"
215
- when "D"
216
- "deleted"
217
- end
208
+ def ignore= path
209
+ default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
210
+ paths = (path.nil? ? default_locations : [path] + default_locations)
211
+ file = nil
212
+ ignore = []
213
+ paths.each do |p|
214
+ if File.exist?(p)
215
+ file = File.open(p)
216
+ STDERR.puts "Loading files to ignore from #{file.path}"
217
+ # return first match
218
+ break
218
219
  end
219
-
220
-
221
- # attempts to parse an issue/bug id from the given commit message
222
- def parse_issue(message)
223
- if match = /(bug|issue) (?<id>\d+)/i.match(message)
224
- return match[:id]
225
- else
226
- return ""
220
+ end
221
+ if file.nil?
222
+ STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
223
+ else
224
+ if self.case_id.nil?
225
+ STDERR.puts "Id in .evocignore not specified, not ignoring any files."
226
+ else
227
+ ignore_file = YAML.load(file)
228
+ if ignore_file.key?(self.case_id)
229
+ ignore = ignore_file[self.case_id]
230
+ if !ignore.nil?
231
+ STDERR.puts "Ignoring #{ignore.size} files"
227
232
  end
233
+ else
234
+ STDERR.puts "The id: '#{self.case_id}' not found in #{file.path}"
235
+ end
228
236
  end
237
+ end
238
+ @ignore = (ignore.nil? ? [] : ignore)
239
+ return @ignore
229
240
  end
241
+ end
230
242
  end
@@ -0,0 +1,43 @@
1
+ # enable logging in classes through 'include Logging'
2
+ module Logging
3
+ def logger
4
+ @logger ||= Logging.logger_for(self.class.name)
5
+ end
6
+
7
+ # Use a hash class-ivar to cache a unique Logger per class:
8
+ @loggers = {}
9
+ @logger_level = 'debug'
10
+ @logger_location = 'vcs2json.log'
11
+
12
+ class << self
13
+ def logger_for(classname)
14
+ @loggers[classname] ||= configure_logger_for(classname)
15
+ end
16
+
17
+ def configure_logger_for(classname)
18
+ logger = Logger.new(@logger_location,'daily')
19
+ logger.progname = classname
20
+ logger.level = const_get('Logger::'+@logger_level.upcase)
21
+ logger
22
+ end
23
+
24
+ def set_location(path)
25
+ @logger_location = path
26
+ end
27
+
28
+ def set_level(level)
29
+ possible_levels = %w(debug info warn error info)
30
+ if !level.nil?
31
+ if !level.empty?
32
+ if possible_levels.include?(level)
33
+ STDERR.puts "Logging level has been set to '#{level}' for output to #{@logger_location}"
34
+ @loggers.each {|l| l.level = const_get('Logger::'+level.upcase)}
35
+ @logger_level = level
36
+ else
37
+ STDERR.puts "Unable to set logger level to #{level}, possible values are #{possible_levels}. Defaulting to 'info'."
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -1,3 +1,3 @@
1
1
  module Vcs2Json
2
- VERSION = "1.0.1"
2
+ VERSION = "2.0.0"
3
3
  end
@@ -4,5 +4,8 @@ require 'json/pure'
4
4
  require 'time'
5
5
  require 'csv'
6
6
  require 'chronic'
7
+ require 'logger' # leveled logging
8
+ require 'nokogiri' # better/faster xml library
9
+ require 'open3' # make system calls and capture stdout/stderr/exitcodes easily
7
10
  require 'require_all'
8
11
  require_rel '/**/*.rb'
data/vcs2json.gemspec CHANGED
@@ -25,4 +25,5 @@ Gem::Specification.new do |spec|
25
25
  spec.add_runtime_dependency "require_all"
26
26
  spec.add_runtime_dependency "json_pure"
27
27
  spec.add_runtime_dependency "chronic"
28
+ spec.add_runtime_dependency "nokogiri"
28
29
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vcs2json
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Rolfsnes
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-06-10 00:00:00.000000000 Z
11
+ date: 2016-09-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
111
125
  description:
112
126
  email:
113
127
  - mail@thomasrolfsnes.com
@@ -136,7 +150,10 @@ files:
136
150
  - lib/exceptions/no_date_field.rb
137
151
  - lib/exceptions/no_file_section.rb
138
152
  - lib/exceptions/no_time_data_in_chafiles_field.rb
153
+ - lib/exceptions/unsupported_language.rb
154
+ - lib/srcML/srcml.rb
139
155
  - lib/vcs2json/git.rb
156
+ - lib/vcs2json/logger.rb
140
157
  - lib/vcs2json/version.rb
141
158
  - lib/vcs2json_helper.rb
142
159
  - vcs2json.gemspec