vcs2json 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/cli/main.rb +28 -9
- data/lib/exceptions/unsupported_language.rb +4 -0
- data/lib/srcML/srcml.rb +262 -0
- data/lib/vcs2json/git.rb +217 -205
- data/lib/vcs2json/logger.rb +43 -0
- data/lib/vcs2json/version.rb +1 -1
- data/lib/vcs2json_helper.rb +3 -0
- data/vcs2json.gemspec +1 -0
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3dfd8c30d50ef1760dd38b36725e30be0b09d9f
|
4
|
+
data.tar.gz: 842566ac9e598784693c76cc60c8a3531ca8b218
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74e7b098604e6437ef3df2a51af00e013c84c790b47502352d2c1bb8c251892c71d22fb65035a6dbd1217e4d27499dd8abd5d2c50f4668d0fe0dfed407ff4725
|
7
|
+
data.tar.gz: d53aa615168906592464caa112cf55ac0ff8f36a0a7274c3b98317e2c898588142034564f683d0f7a0670b53c78aa947eecbe5d7ea44d8a820702a758c3204de
|
data/.gitignore
CHANGED
data/lib/cli/main.rb
CHANGED
@@ -4,20 +4,39 @@ module Vcs2JsonCLI
|
|
4
4
|
class Main < Thor
|
5
5
|
map %w[--version -v] => :__print_version
|
6
6
|
|
7
|
+
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to log"
|
8
|
+
class_option :logger_location, type: :string, desc: "Which file to print logs to"
|
9
|
+
|
7
10
|
desc "--version, -v", "print the version"
|
8
11
|
def __print_version
|
9
12
|
puts Vcs2Json::VERSION
|
10
13
|
end
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
##
|
16
|
+
# default thor behavior is to return exit 0 on errors (i.e., success..)
|
17
|
+
# by having exit_on_failure return true, exit(1) is returned instead
|
18
|
+
def self.exit_on_failure?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
|
22
|
+
class_option :ignore, type: :string, desc: "Specify location of .evocignore file"
|
23
|
+
class_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
|
24
|
+
class_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
|
25
|
+
class_option :after, :aliases => '-a', :desc => "Only include commits after this date"
|
26
|
+
class_option :before, :aliases => '-b', :desc => "Only include commits before this date"
|
27
|
+
class_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
|
28
|
+
class_option :fine_grained, type: :boolean, default: true, desc: "Include fine grained change information in output"
|
29
|
+
class_option :ignore_comments, type: :boolean, default: false, desc: "Ignore comments when calculating diffs. Only in effect for fine grained changes."
|
30
|
+
class_option :ignore_whitespace, type: :boolean, default: false, desc: "Ignore whitespace when calculating diffs. Only in effect for fine grained changes."
|
31
|
+
class_option :residuals, type: :boolean, default: true, desc: "Consider changes that happen outside of methods"
|
32
|
+
|
33
|
+
|
18
34
|
desc "git [options]","Make a dump of the change-history of system using git, output on stdout"
|
19
|
-
|
20
|
-
Vcs2Json::Git.new(options).
|
21
|
-
|
35
|
+
def git
|
36
|
+
Vcs2Json::Git.new(options).parse
|
37
|
+
end
|
38
|
+
|
39
|
+
# the default is to use git
|
40
|
+
default_task :git
|
22
41
|
end
|
23
42
|
end
|
data/lib/srcML/srcml.rb
ADDED
@@ -0,0 +1,262 @@
|
|
1
|
+
# Wrapper for the srcML commandline interface,
|
2
|
+
# with functions specifically directed at extracting method names and calculating diffs between files.
|
3
|
+
|
4
|
+
module SrcML
|
5
|
+
extend Logging
|
6
|
+
|
7
|
+
# Hash of supported languages.
|
8
|
+
#
|
9
|
+
# The file extension must map to a known format for srcML
|
10
|
+
LANGUAGES = {'.java' => 'Java',
|
11
|
+
'.C' => 'C++',
|
12
|
+
'.cc' => 'C++',
|
13
|
+
'.cpp' => 'C++',
|
14
|
+
'.CPP' => 'C++',
|
15
|
+
'.c++' => 'C++',
|
16
|
+
'.cp' => 'C++',
|
17
|
+
'.c' => 'C'}
|
18
|
+
|
19
|
+
# Check that SrcML is available
|
20
|
+
if system("srcml", '--version')
|
21
|
+
SRCML = "srcml"
|
22
|
+
else
|
23
|
+
$stderr.puts "SrcML is required, please install from www.srcml.com"
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# PUBLIC INTERFACE
|
29
|
+
##
|
30
|
+
|
31
|
+
###########
|
32
|
+
# OPTIONS #
|
33
|
+
###########
|
34
|
+
|
35
|
+
# Whether to remove comments from the source
|
36
|
+
@@ignore_comments = false
|
37
|
+
# Whether to remove whitespace from the source
|
38
|
+
@@ignore_whitespace = false
|
39
|
+
# Whether to qualify files with their full path or just their basename
|
40
|
+
# i.e., /lib/file.a or just file.a
|
41
|
+
@@basename_qualify = false
|
42
|
+
# Whether to consider changes that happen outside of methods
|
43
|
+
@@residuals = false
|
44
|
+
|
45
|
+
def self.ignore_comments= bool
|
46
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
47
|
+
@@ignore_comments = bool
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.ignore_comments?
|
51
|
+
@@ignore_comments
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.ignore_whitespace= bool
|
55
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
56
|
+
@@ignore_whitespace = bool
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.ignore_whitespace?
|
60
|
+
@@ignore_whitespace
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.basename_qualify= bool
|
64
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
65
|
+
@@basename_qualify = bool
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.basename_qualify?
|
69
|
+
@@basename_qualify
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.residuals= bool
|
73
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
74
|
+
@@residuals = bool
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.residuals?
|
78
|
+
@@residuals
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
###########
|
83
|
+
# METHODS #
|
84
|
+
###########
|
85
|
+
|
86
|
+
##
|
87
|
+
# Calculates the AST of the given file
|
88
|
+
#
|
89
|
+
# @param [String] path the path to the file
|
90
|
+
# @param [String] rev if specified, retrieves the file from the given revision
|
91
|
+
#
|
92
|
+
# @return [Nokogiri::XML::Document] an AST representation
|
93
|
+
def self.ast(path,revision: FALSE)
|
94
|
+
# get the file content
|
95
|
+
ast = ''
|
96
|
+
if revision
|
97
|
+
# explicitly call bash to get support for process substitution
|
98
|
+
if language = LANGUAGES[File.extname(path)]
|
99
|
+
ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
|
100
|
+
if !s.success?
|
101
|
+
raise ArgumentError, e
|
102
|
+
end
|
103
|
+
else
|
104
|
+
raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
|
105
|
+
end
|
106
|
+
else
|
107
|
+
ast,e,s = Open3.capture3("#{SRCML} #{path}")
|
108
|
+
if !s.success?
|
109
|
+
raise ArgumentError, e
|
110
|
+
end
|
111
|
+
end
|
112
|
+
# turn into structured xml
|
113
|
+
xml = Nokogiri::XML(ast)
|
114
|
+
if ignore_comments?
|
115
|
+
# remove all comments
|
116
|
+
xml.search('comment').each do |c|
|
117
|
+
# trailing newline + any number of spaces are removed from the previous node
|
118
|
+
# this gives a more intuitive behaviour
|
119
|
+
# i.e., the newline + spaces before the comment is considered "part of" the comment
|
120
|
+
if previous_node = c.previous_sibling
|
121
|
+
previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
|
122
|
+
end
|
123
|
+
# now remove the comment
|
124
|
+
c.remove
|
125
|
+
end
|
126
|
+
end
|
127
|
+
if ignore_whitespace?
|
128
|
+
# remove all new lines
|
129
|
+
xml.search("text()").each do |node|
|
130
|
+
if node.content =~ /\S/
|
131
|
+
node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
|
132
|
+
else
|
133
|
+
node.remove
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
return xml
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# Returns the methods of the given file
|
142
|
+
#
|
143
|
+
# If the method has any parameters, the parameter types are also returned with the method name
|
144
|
+
#
|
145
|
+
# @param [String] path the path to the file
|
146
|
+
# @param [String] rev if specified, retrieves the file from the given revision
|
147
|
+
# @return [Hash[method_name => method_hash]] a hash storing the methods as keys and the hashed method as values
|
148
|
+
def self.methods(path,revision: FALSE)
|
149
|
+
ast = ast(path,revision: revision)
|
150
|
+
# hash each method and store in hash map with function name as key
|
151
|
+
methods = Hash.new
|
152
|
+
qualified_file = path
|
153
|
+
if basename_qualify?
|
154
|
+
qualified_file = File.basename(path)
|
155
|
+
end
|
156
|
+
# split file based on class declarations
|
157
|
+
partitions = ast.search("class")
|
158
|
+
if partitions.empty?
|
159
|
+
# no classes, just use the full ast
|
160
|
+
partitions = [ast]
|
161
|
+
end
|
162
|
+
partitions.each do |partition|
|
163
|
+
# if partitioned into classes, attempt to extract class name
|
164
|
+
class_name = ''
|
165
|
+
if !partition.document? & name = partition.at_css("/name")
|
166
|
+
class_name = name.text
|
167
|
+
else
|
168
|
+
logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
|
169
|
+
end
|
170
|
+
partition.search("function").each do |function|
|
171
|
+
if name = function.at_css("/name")
|
172
|
+
# attempt to extract parameters
|
173
|
+
parameters = []
|
174
|
+
if parameter_list = function.at_css("/parameter_list")
|
175
|
+
parameter_list.search("parameter").each do |p|
|
176
|
+
if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
|
177
|
+
parameters << parameter.text
|
178
|
+
else
|
179
|
+
logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
else
|
183
|
+
logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
|
184
|
+
end
|
185
|
+
method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
|
186
|
+
fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
|
187
|
+
if block = function.at_css("block")
|
188
|
+
methods[fully_qualified_name] = block.content.hash
|
189
|
+
else
|
190
|
+
logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
|
191
|
+
end
|
192
|
+
else
|
193
|
+
logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
if residuals?
|
198
|
+
# add residuals entry
|
199
|
+
# i.e., whats left of the code when all methods are removed
|
200
|
+
ast.search("function").remove
|
201
|
+
methods[qualified_file+':'+'@residuals'] = ast.content.hash
|
202
|
+
end
|
203
|
+
|
204
|
+
return methods
|
205
|
+
end
|
206
|
+
|
207
|
+
##
|
208
|
+
# Given two Hashes, returns all the keys that either have different values in the two hashes
|
209
|
+
# or are not in both hashes.
|
210
|
+
#
|
211
|
+
# @param: [Hash] old
|
212
|
+
# @param: [Hash] new
|
213
|
+
# @return [Array<String>]
|
214
|
+
def self.different_entries(old,new)
|
215
|
+
different = []
|
216
|
+
new.each do |k,v|
|
217
|
+
# new keys
|
218
|
+
if !old.key?(k)
|
219
|
+
# puts "KEY NOT IN OLD: #{k}"
|
220
|
+
different << k
|
221
|
+
# different values for same key
|
222
|
+
elsif v != old[k]
|
223
|
+
# puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
|
224
|
+
different << k
|
225
|
+
end
|
226
|
+
end
|
227
|
+
# keys that are only in old
|
228
|
+
deleted_keys = old.keys - new.keys
|
229
|
+
if !deleted_keys.empty?
|
230
|
+
# puts "KEY NOT IN NEW: #{deleted_keys}"
|
231
|
+
different.concat(deleted_keys)
|
232
|
+
end
|
233
|
+
return different
|
234
|
+
end
|
235
|
+
|
236
|
+
|
237
|
+
##
|
238
|
+
# Calculate the changed methods of the file specified by revision and path
|
239
|
+
#
|
240
|
+
# @param [String] old the path to the old file
|
241
|
+
# @param [String] new the path to the new file
|
242
|
+
# @return [Array<String>] the changed methods
|
243
|
+
def self.changed_methods(old,new)
|
244
|
+
methods_old = methods(old)
|
245
|
+
methods_new = methods(new)
|
246
|
+
return different_entries(methods_old,methods_new)
|
247
|
+
end
|
248
|
+
|
249
|
+
##
|
250
|
+
# Like #changed_methods but retrieves the file from a git revision
|
251
|
+
# Calculate the changed methods of the file specified by revision and path
|
252
|
+
#
|
253
|
+
# @param [String] path the path to the file
|
254
|
+
# @param [String] revision the revision to retrieve the file from
|
255
|
+
# @return [Array<String>] the changed methods
|
256
|
+
def self.changed_methods_git(path,revision)
|
257
|
+
methods_new = methods(path, revision: revision)
|
258
|
+
methods_old = methods(path, revision: revision+'~1')
|
259
|
+
return different_entries(methods_old,methods_new)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
data/lib/vcs2json/git.rb
CHANGED
@@ -1,230 +1,242 @@
|
|
1
1
|
require_relative '../vcs2json_helper'
|
2
2
|
|
3
3
|
module Vcs2Json
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
4
|
+
class Git
|
5
|
+
include Logging
|
6
|
+
|
7
|
+
attr_accessor :number, :fine_grained, :case_id
|
8
|
+
attr_reader :ignore
|
9
|
+
|
10
|
+
# Generate separators between fields and commits
|
11
|
+
FIELD_SEP = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
|
12
|
+
META_DATA = "%H#{FIELD_SEP}"\
|
13
|
+
"%an#{FIELD_SEP}"\
|
14
|
+
"%ae#{FIELD_SEP}"\
|
15
|
+
"%ad#{FIELD_SEP}"\
|
16
|
+
"%cn#{FIELD_SEP}"\
|
17
|
+
"%ce#{FIELD_SEP}"\
|
18
|
+
"%cd#{FIELD_SEP}"\
|
19
|
+
"%B"
|
20
|
+
|
21
|
+
def initialize(opts)
|
22
|
+
self.ignore = opts[:ignore]
|
23
|
+
self.before = opts[:before]
|
24
|
+
self.after = opts[:after]
|
25
|
+
self.number = opts[:number]
|
26
|
+
self.fine_grained = opts[:fine_grained]
|
27
|
+
|
28
|
+
# Set logger level
|
29
|
+
Logging.set_location(opts[:logger_location])
|
30
|
+
Logging.set_level(opts[:logger_level])
|
31
|
+
SrcML.ignore_comments = opts[:ignore_comments]
|
32
|
+
SrcML.ignore_whitespace = opts[:ignore_whitespace]
|
33
|
+
SrcML.residuals = opts[:residuals]
|
34
|
+
end
|
26
35
|
|
27
|
-
|
28
|
-
|
36
|
+
def after=(after)
|
37
|
+
if !after.nil?
|
38
|
+
begin
|
39
|
+
Date.parse(after)
|
40
|
+
@after = after
|
41
|
+
rescue
|
42
|
+
STDERR.puts "Invalid date --after=#{after}. Ignoring option."
|
43
|
+
@after = nil
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
29
47
|
|
30
|
-
|
31
|
-
|
48
|
+
def after
|
49
|
+
@after.nil? ? '' : "--after=\"#{@after}\""
|
50
|
+
end
|
32
51
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
puts e
|
42
|
-
end
|
52
|
+
def before=(before)
|
53
|
+
if !before.nil?
|
54
|
+
begin
|
55
|
+
Date.parse(before)
|
56
|
+
@before = before
|
57
|
+
rescue
|
58
|
+
STDERR.puts "Invalid date --before=#{before}. Ignoring option."
|
59
|
+
@before = nil
|
43
60
|
end
|
61
|
+
end
|
62
|
+
end
|
44
63
|
|
45
|
-
|
46
|
-
|
47
|
-
|
64
|
+
def before
|
65
|
+
@before.nil? ? '' : "--before=\"#{@before}\""
|
66
|
+
end
|
48
67
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
68
|
+
def parse
|
69
|
+
|
70
|
+
# keeps track of number of commits successfully parsed
|
71
|
+
commit_counter = 0
|
72
|
+
|
73
|
+
# keeps track of empty commits
|
74
|
+
empty_commits = []
|
75
|
+
|
76
|
+
##########################
|
77
|
+
# GET LIST OF COMMIT IDS #
|
78
|
+
##########################
|
79
|
+
|
80
|
+
# getting the list of revision ids is cheap, so we get some extra in case we are unable to parse the required amount in the first 'n' commits
|
81
|
+
commit_ids = `git rev-list HEAD #{self.before} #{self.after} -n #{self.number*10} --no-merges`.split
|
82
|
+
|
83
|
+
############################
|
84
|
+
# ITERATE OVER EACH COMMIT #
|
85
|
+
############################
|
86
|
+
|
87
|
+
commit_ids.each do |id|
|
88
|
+
logger.debug "Parsing commit: #{id}"
|
89
|
+
# get the changed files
|
90
|
+
changed_files = `git log --pretty=format:'' --name-status #{id} -n 1`.split("\n")
|
91
|
+
# removed ignored files
|
92
|
+
changed_files.reject! {|i| self.ignore.include?(i)}
|
93
|
+
# add files changed info
|
94
|
+
if !changed_files.empty?
|
95
|
+
|
96
|
+
##################
|
97
|
+
# FETCH METADATA #
|
98
|
+
##################
|
99
|
+
|
100
|
+
raw_commit = `git log --pretty=format:'#{META_DATA}' #{id} -n 1`
|
101
|
+
commit = ''
|
102
|
+
|
103
|
+
##################
|
104
|
+
# CLEAN RAW DATA #
|
105
|
+
##################
|
106
|
+
|
107
|
+
begin
|
108
|
+
# try encoding to utf8
|
109
|
+
commit = raw_commit.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
110
|
+
# need to expliceitely check if the encoding is valid for ruby <= 2.0
|
111
|
+
# utf8 -> utf8 will not do anything even with invalid bytes
|
112
|
+
# http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
|
113
|
+
if !commit.valid_encoding?
|
114
|
+
# encode to utf16 first and then back to utf8
|
115
|
+
commit.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
|
116
|
+
commit.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
97
117
|
end
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
118
|
+
rescue ArgumentError
|
119
|
+
raise EncodingError.new, "Unable to encode input as UTF-8"
|
120
|
+
end
|
121
|
+
|
122
|
+
##############################
|
123
|
+
# CONSTRUCT OUTPUT HASH/JSON #
|
124
|
+
##############################
|
125
|
+
|
126
|
+
output_hash = Hash.new
|
127
|
+
fields = commit.split(FIELD_SEP)
|
128
|
+
sha = fields[0].delete("\n") #remove astray newlines
|
129
|
+
output_hash[:sha] = sha
|
130
|
+
output_hash[:name] = fields[1]
|
131
|
+
output_hash[:email] = fields[2]
|
132
|
+
output_hash[:date] = Time.parse fields[3]
|
133
|
+
output_hash[:committer_name] = fields[4]
|
134
|
+
output_hash[:committer_email]= fields[5]
|
135
|
+
output_hash[:committer_date] = Time.parse fields[6]
|
136
|
+
output_hash[:message] = fields[7]
|
137
|
+
output_hash[:changes] = []
|
138
|
+
|
139
|
+
#######################################
|
140
|
+
# PARSE FILES FOR FINEGRAINED CHANGES #
|
141
|
+
#######################################
|
142
|
+
|
143
|
+
# print progress
|
144
|
+
|
145
|
+
|
146
|
+
changed_files.each_with_index do |line,index|
|
147
|
+
STDERR.print "Parsing file #{index+1} of #{changed_files.size} in commit #{commit_counter+1} of #{self.number} \r"
|
148
|
+
if !line.empty?
|
149
|
+
file_info = line.split("\t")
|
150
|
+
file_name = file_info[1]
|
151
|
+
status = file_info[0]
|
152
|
+
# add finer grained change info
|
153
|
+
if self.fine_grained
|
154
|
+
begin
|
155
|
+
# new file, all methods are new, no need to calculate diff
|
156
|
+
if status == 'A'
|
157
|
+
SrcML.methods(file_name,revision: id).keys.each {|m| output_hash[:changes] << m}
|
158
|
+
# calculate diffs
|
159
|
+
else
|
160
|
+
SrcML.changed_methods_git(file_name,id).each {|m| output_hash[:changes] << m}
|
161
|
+
end
|
162
|
+
rescue SrcML::UnsupportedLanguageError
|
163
|
+
output_hash[:changes] << file_name
|
108
164
|
end
|
165
|
+
else
|
166
|
+
output_hash[:changes] << file_name
|
167
|
+
end
|
109
168
|
end
|
110
|
-
|
111
|
-
end
|
169
|
+
end # changes_files.each
|
112
170
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
171
|
+
# Only add commits where at least on changes was detected
|
172
|
+
if !output_hash[:changes].empty?
|
173
|
+
###########################
|
174
|
+
# PRINT COMMIT TO $stdout #
|
175
|
+
###########################
|
119
176
|
|
120
|
-
|
121
|
-
raw_commits = `git log #{hash_2_gitoptions(opts)} --pretty=format:'%H#{FIELD_SEP}%cn#{FIELD_SEP}%ce#{FIELD_SEP}%cd#{FIELD_SEP}%ad#{FIELD_SEP}%B#{COMMIT_SEP}'`
|
122
|
-
|
123
|
-
begin
|
124
|
-
encoded = ''
|
125
|
-
# try encoding to utf8
|
126
|
-
encoded = raw_commits.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
127
|
-
# need to expliceitely check if the encoding is valid for ruby <= 2.0
|
128
|
-
# utf8 -> utf8 will not do anything even with invalid bytes
|
129
|
-
# http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
|
130
|
-
if !encoded.valid_encoding?
|
131
|
-
# encode to utf16 first and then back to utf8
|
132
|
-
encoded.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
|
133
|
-
encoded.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
134
|
-
end
|
135
|
-
# split into individual commits
|
136
|
-
commits_info = encoded.split(COMMIT_SEP)
|
137
|
-
rescue ArgumentError
|
138
|
-
raise EncodingError.new, "Unable to encode input as UTF-8"
|
139
|
-
end
|
177
|
+
$stdout.puts output_hash.to_json
|
140
178
|
|
141
|
-
|
142
|
-
|
143
|
-
sha = fields[0].delete("\n") #remove astray newlines
|
144
|
-
@commits[sha][:sha] = sha
|
145
|
-
@commits[sha][:name] = fields[1]
|
146
|
-
@commits[sha][:email] = fields[2]
|
147
|
-
@commits[sha][:date] = Time.parse fields[3]
|
148
|
-
@commits[sha][:author_date] = Time.parse fields[4]
|
149
|
-
@commits[sha][:message] = fields[5]
|
150
|
-
|
151
|
-
# attempt to parse an issue id from the commit message
|
152
|
-
if @opts[:issue]
|
153
|
-
@commits[commit[0]][:issue] = parse_issue(@commits[sha][:message])
|
154
|
-
end
|
155
|
-
end
|
156
|
-
end
|
179
|
+
# increase counter for number of commits successfully parsed
|
180
|
+
commit_counter += 1
|
157
181
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
if !commit.empty?
|
162
|
-
lines = commit.split("\n")
|
163
|
-
sha = lines[0]
|
164
|
-
@commits[sha][:changes][:all] = []
|
165
|
-
if lines.size > 1
|
166
|
-
lines[1..-1].each do |line|
|
167
|
-
if !line.empty?
|
168
|
-
file_info = line.split("\t")
|
169
|
-
file_name = file_info[1]
|
170
|
-
status = file_info[0]
|
171
|
-
@commits[sha][:changes][:all] << file_name
|
172
|
-
@commits[sha][:changes][:details][file_name][:filename] = file_name
|
173
|
-
@commits[sha][:changes][:details][file_name][:status] = parse_status(status)
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
# filter out ignored files
|
178
|
-
if !self.ignore.nil?
|
179
|
-
@commits[sha][:changes][:all].reject! {|i| self.ignore.include?(i)}
|
180
|
-
end
|
181
|
-
if @commits[sha][:changes][:all].empty?
|
182
|
-
@empty_commits << sha
|
183
|
-
@commits.delete(sha)
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
182
|
+
########################################
|
183
|
+
# CHECK IF REQUESTED AMOUNT IS REACHED #
|
184
|
+
########################################
|
188
185
|
|
189
|
-
|
190
|
-
|
191
|
-
mapping = Hash.new
|
192
|
-
index_counter = 0
|
193
|
-
@commits.each do |sha,info|
|
194
|
-
integer_representation = []
|
195
|
-
info[:changes][:all].each do |file|
|
196
|
-
if mapping[file].nil?
|
197
|
-
mapping[file] = index_counter
|
198
|
-
index_counter += 1
|
199
|
-
end
|
200
|
-
integer_representation << mapping[file]
|
201
|
-
info[:changes][:details][file][:id] = mapping[file]
|
202
|
-
end
|
203
|
-
info[:changes][:all].clear
|
204
|
-
info[:changes][:all] = integer_representation
|
186
|
+
if commit_counter == self.number
|
187
|
+
break # out of loop
|
205
188
|
end
|
189
|
+
else # no changes detected in commit
|
190
|
+
empty_commits << id
|
191
|
+
end
|
192
|
+
else # no files in commit
|
193
|
+
empty_commits << id
|
206
194
|
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# we may still lack commits after exhaustive search, notify user
|
198
|
+
if commit_counter < self.number
|
199
|
+
STDERR.puts "Asked for #{self.number} commits, only found #{commit_counter} non-empty commits in the last #{self.number*2} commits"
|
200
|
+
end
|
201
|
+
# print ids of empty commits to stderr
|
202
|
+
if !empty_commits.empty?
|
203
|
+
STDERR.puts "EMPTY COMMITS"
|
204
|
+
STDERR.puts empty_commits
|
205
|
+
end
|
206
|
+
end
|
207
207
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
208
|
+
def ignore= path
|
209
|
+
default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
|
210
|
+
paths = (path.nil? ? default_locations : [path] + default_locations)
|
211
|
+
file = nil
|
212
|
+
ignore = []
|
213
|
+
paths.each do |p|
|
214
|
+
if File.exist?(p)
|
215
|
+
file = File.open(p)
|
216
|
+
STDERR.puts "Loading files to ignore from #{file.path}"
|
217
|
+
# return first match
|
218
|
+
break
|
218
219
|
end
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
220
|
+
end
|
221
|
+
if file.nil?
|
222
|
+
STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
|
223
|
+
else
|
224
|
+
if self.case_id.nil?
|
225
|
+
STDERR.puts "Id in .evocignore not specified, not ignoring any files."
|
226
|
+
else
|
227
|
+
ignore_file = YAML.load(file)
|
228
|
+
if ignore_file.key?(self.case_id)
|
229
|
+
ignore = ignore_file[self.case_id]
|
230
|
+
if !ignore.nil?
|
231
|
+
STDERR.puts "Ignoring #{ignore.size} files"
|
227
232
|
end
|
233
|
+
else
|
234
|
+
STDERR.puts "The id: '#{self.case_id}' not found in #{file.path}"
|
235
|
+
end
|
228
236
|
end
|
237
|
+
end
|
238
|
+
@ignore = (ignore.nil? ? [] : ignore)
|
239
|
+
return @ignore
|
229
240
|
end
|
241
|
+
end
|
230
242
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# enable logging in classes through 'include Logging'
|
2
|
+
module Logging
|
3
|
+
def logger
|
4
|
+
@logger ||= Logging.logger_for(self.class.name)
|
5
|
+
end
|
6
|
+
|
7
|
+
# Use a hash class-ivar to cache a unique Logger per class:
|
8
|
+
@loggers = {}
|
9
|
+
@logger_level = 'debug'
|
10
|
+
@logger_location = 'vcs2json.log'
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def logger_for(classname)
|
14
|
+
@loggers[classname] ||= configure_logger_for(classname)
|
15
|
+
end
|
16
|
+
|
17
|
+
def configure_logger_for(classname)
|
18
|
+
logger = Logger.new(@logger_location,'daily')
|
19
|
+
logger.progname = classname
|
20
|
+
logger.level = const_get('Logger::'+@logger_level.upcase)
|
21
|
+
logger
|
22
|
+
end
|
23
|
+
|
24
|
+
def set_location(path)
|
25
|
+
@logger_location = path
|
26
|
+
end
|
27
|
+
|
28
|
+
def set_level(level)
|
29
|
+
possible_levels = %w(debug info warn error info)
|
30
|
+
if !level.nil?
|
31
|
+
if !level.empty?
|
32
|
+
if possible_levels.include?(level)
|
33
|
+
STDERR.puts "Logging level has been set to '#{level}' for output to #{@logger_location}"
|
34
|
+
@loggers.each {|l| l.level = const_get('Logger::'+level.upcase)}
|
35
|
+
@logger_level = level
|
36
|
+
else
|
37
|
+
STDERR.puts "Unable to set logger level to #{level}, possible values are #{possible_levels}. Defaulting to 'info'."
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/vcs2json/version.rb
CHANGED
data/lib/vcs2json_helper.rb
CHANGED
@@ -4,5 +4,8 @@ require 'json/pure'
|
|
4
4
|
require 'time'
|
5
5
|
require 'csv'
|
6
6
|
require 'chronic'
|
7
|
+
require 'logger' # leveled logging
|
8
|
+
require 'nokogiri' # better/faster xml library
|
9
|
+
require 'open3' # make system calls and capture stdout/stderr/exitcodes easily
|
7
10
|
require 'require_all'
|
8
11
|
require_rel '/**/*.rb'
|
data/vcs2json.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vcs2json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
111
125
|
description:
|
112
126
|
email:
|
113
127
|
- mail@thomasrolfsnes.com
|
@@ -136,7 +150,10 @@ files:
|
|
136
150
|
- lib/exceptions/no_date_field.rb
|
137
151
|
- lib/exceptions/no_file_section.rb
|
138
152
|
- lib/exceptions/no_time_data_in_chafiles_field.rb
|
153
|
+
- lib/exceptions/unsupported_language.rb
|
154
|
+
- lib/srcML/srcml.rb
|
139
155
|
- lib/vcs2json/git.rb
|
156
|
+
- lib/vcs2json/logger.rb
|
140
157
|
- lib/vcs2json/version.rb
|
141
158
|
- lib/vcs2json_helper.rb
|
142
159
|
- vcs2json.gemspec
|