vcs2json 1.0.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/cli/main.rb +28 -9
- data/lib/exceptions/unsupported_language.rb +4 -0
- data/lib/srcML/srcml.rb +262 -0
- data/lib/vcs2json/git.rb +217 -205
- data/lib/vcs2json/logger.rb +43 -0
- data/lib/vcs2json/version.rb +1 -1
- data/lib/vcs2json_helper.rb +3 -0
- data/vcs2json.gemspec +1 -0
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3dfd8c30d50ef1760dd38b36725e30be0b09d9f
|
4
|
+
data.tar.gz: 842566ac9e598784693c76cc60c8a3531ca8b218
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74e7b098604e6437ef3df2a51af00e013c84c790b47502352d2c1bb8c251892c71d22fb65035a6dbd1217e4d27499dd8abd5d2c50f4668d0fe0dfed407ff4725
|
7
|
+
data.tar.gz: d53aa615168906592464caa112cf55ac0ff8f36a0a7274c3b98317e2c898588142034564f683d0f7a0670b53c78aa947eecbe5d7ea44d8a820702a758c3204de
|
data/.gitignore
CHANGED
data/lib/cli/main.rb
CHANGED
@@ -4,20 +4,39 @@ module Vcs2JsonCLI
|
|
4
4
|
class Main < Thor
|
5
5
|
map %w[--version -v] => :__print_version
|
6
6
|
|
7
|
+
class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to log"
|
8
|
+
class_option :logger_location, type: :string, desc: "Which file to print logs to"
|
9
|
+
|
7
10
|
desc "--version, -v", "print the version"
|
8
11
|
def __print_version
|
9
12
|
puts Vcs2Json::VERSION
|
10
13
|
end
|
11
14
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
##
|
16
|
+
# default thor behavior is to return exit 0 on errors (i.e., success..)
|
17
|
+
# by having exit_on_failure return true, exit(1) is returned instead
|
18
|
+
def self.exit_on_failure?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
|
22
|
+
class_option :ignore, type: :string, desc: "Specify location of .evocignore file"
|
23
|
+
class_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
|
24
|
+
class_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
|
25
|
+
class_option :after, :aliases => '-a', :desc => "Only include commits after this date"
|
26
|
+
class_option :before, :aliases => '-b', :desc => "Only include commits before this date"
|
27
|
+
class_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
|
28
|
+
class_option :fine_grained, type: :boolean, default: true, desc: "Include fine grained change information in output"
|
29
|
+
class_option :ignore_comments, type: :boolean, default: false, desc: "Ignore comments when calculating diffs. Only in effect for fine grained changes."
|
30
|
+
class_option :ignore_whitespace, type: :boolean, default: false, desc: "Ignore whitespace when calculating diffs. Only in effect for fine grained changes."
|
31
|
+
class_option :residuals, type: :boolean, default: true, desc: "Consider changes that happen outside of methods"
|
32
|
+
|
33
|
+
|
18
34
|
desc "git [options]","Make a dump of the change-history of system using git, output on stdout"
|
19
|
-
|
20
|
-
Vcs2Json::Git.new(options).
|
21
|
-
|
35
|
+
def git
|
36
|
+
Vcs2Json::Git.new(options).parse
|
37
|
+
end
|
38
|
+
|
39
|
+
# the default is to use git
|
40
|
+
default_task :git
|
22
41
|
end
|
23
42
|
end
|
data/lib/srcML/srcml.rb
ADDED
@@ -0,0 +1,262 @@
|
|
1
|
+
# Wrapper for the srcML commandline interface,
|
2
|
+
# with functions specifically directed at extracting method names and calculating diffs between files.
|
3
|
+
|
4
|
+
module SrcML
|
5
|
+
extend Logging
|
6
|
+
|
7
|
+
# Hash of supported languages.
|
8
|
+
#
|
9
|
+
# The file extension must map to a known format for srcML
|
10
|
+
LANGUAGES = {'.java' => 'Java',
|
11
|
+
'.C' => 'C++',
|
12
|
+
'.cc' => 'C++',
|
13
|
+
'.cpp' => 'C++',
|
14
|
+
'.CPP' => 'C++',
|
15
|
+
'.c++' => 'C++',
|
16
|
+
'.cp' => 'C++',
|
17
|
+
'.c' => 'C'}
|
18
|
+
|
19
|
+
# Check that SrcML is available
|
20
|
+
if system("srcml", '--version')
|
21
|
+
SRCML = "srcml"
|
22
|
+
else
|
23
|
+
$stderr.puts "SrcML is required, please install from www.srcml.com"
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# PUBLIC INTERFACE
|
29
|
+
##
|
30
|
+
|
31
|
+
###########
|
32
|
+
# OPTIONS #
|
33
|
+
###########
|
34
|
+
|
35
|
+
# Whether to remove comments from the source
|
36
|
+
@@ignore_comments = false
|
37
|
+
# Whether to remove whitespace from the source
|
38
|
+
@@ignore_whitespace = false
|
39
|
+
# Whether to qualify files with their full path or just their basename
|
40
|
+
# i.e., /lib/file.a or just file.a
|
41
|
+
@@basename_qualify = false
|
42
|
+
# Whether to consider changes that happen outside of methods
|
43
|
+
@@residuals = false
|
44
|
+
|
45
|
+
def self.ignore_comments= bool
|
46
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
47
|
+
@@ignore_comments = bool
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.ignore_comments?
|
51
|
+
@@ignore_comments
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.ignore_whitespace= bool
|
55
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
56
|
+
@@ignore_whitespace = bool
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.ignore_whitespace?
|
60
|
+
@@ignore_whitespace
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.basename_qualify= bool
|
64
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
65
|
+
@@basename_qualify = bool
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.basename_qualify?
|
69
|
+
@@basename_qualify
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.residuals= bool
|
73
|
+
unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
|
74
|
+
@@residuals = bool
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.residuals?
|
78
|
+
@@residuals
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
###########
|
83
|
+
# METHODS #
|
84
|
+
###########
|
85
|
+
|
86
|
+
##
|
87
|
+
# Calculates the AST of the given file
|
88
|
+
#
|
89
|
+
# @param [String] path the path to the file
|
90
|
+
# @param [String] rev if specified, retrieves the file from the given revision
|
91
|
+
#
|
92
|
+
# @return [Nokogiri::XML::Document] an AST representation
|
93
|
+
def self.ast(path,revision: FALSE)
|
94
|
+
# get the file content
|
95
|
+
ast = ''
|
96
|
+
if revision
|
97
|
+
# explicitly call bash to get support for process substitution
|
98
|
+
if language = LANGUAGES[File.extname(path)]
|
99
|
+
ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
|
100
|
+
if !s.success?
|
101
|
+
raise ArgumentError, e
|
102
|
+
end
|
103
|
+
else
|
104
|
+
raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
|
105
|
+
end
|
106
|
+
else
|
107
|
+
ast,e,s = Open3.capture3("#{SRCML} #{path}")
|
108
|
+
if !s.success?
|
109
|
+
raise ArgumentError, e
|
110
|
+
end
|
111
|
+
end
|
112
|
+
# turn into structured xml
|
113
|
+
xml = Nokogiri::XML(ast)
|
114
|
+
if ignore_comments?
|
115
|
+
# remove all comments
|
116
|
+
xml.search('comment').each do |c|
|
117
|
+
# trailing newline + any number of spaces are removed from the previous node
|
118
|
+
# this gives a more intuitive behaviour
|
119
|
+
# i.e., the newline + spaces before the comment is considered "part of" the comment
|
120
|
+
if previous_node = c.previous_sibling
|
121
|
+
previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
|
122
|
+
end
|
123
|
+
# now remove the comment
|
124
|
+
c.remove
|
125
|
+
end
|
126
|
+
end
|
127
|
+
if ignore_whitespace?
|
128
|
+
# remove all new lines
|
129
|
+
xml.search("text()").each do |node|
|
130
|
+
if node.content =~ /\S/
|
131
|
+
node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
|
132
|
+
else
|
133
|
+
node.remove
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
return xml
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# Returns the methods of the given file
|
142
|
+
#
|
143
|
+
# If the method has any parameters, the parameter types are also returned with the method name
|
144
|
+
#
|
145
|
+
# @param [String] path the path to the file
|
146
|
+
# @param [String] rev if specified, retrieves the file from the given revision
|
147
|
+
# @return [Hash[method_name => method_hash]] a hash storing the methods as keys and the hashed method as values
|
148
|
+
def self.methods(path,revision: FALSE)
|
149
|
+
ast = ast(path,revision: revision)
|
150
|
+
# hash each method and store in hash map with function name as key
|
151
|
+
methods = Hash.new
|
152
|
+
qualified_file = path
|
153
|
+
if basename_qualify?
|
154
|
+
qualified_file = File.basename(path)
|
155
|
+
end
|
156
|
+
# split file based on class declarations
|
157
|
+
partitions = ast.search("class")
|
158
|
+
if partitions.empty?
|
159
|
+
# no classes, just use the full ast
|
160
|
+
partitions = [ast]
|
161
|
+
end
|
162
|
+
partitions.each do |partition|
|
163
|
+
# if partitioned into classes, attempt to extract class name
|
164
|
+
class_name = ''
|
165
|
+
if !partition.document? & name = partition.at_css("/name")
|
166
|
+
class_name = name.text
|
167
|
+
else
|
168
|
+
logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
|
169
|
+
end
|
170
|
+
partition.search("function").each do |function|
|
171
|
+
if name = function.at_css("/name")
|
172
|
+
# attempt to extract parameters
|
173
|
+
parameters = []
|
174
|
+
if parameter_list = function.at_css("/parameter_list")
|
175
|
+
parameter_list.search("parameter").each do |p|
|
176
|
+
if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
|
177
|
+
parameters << parameter.text
|
178
|
+
else
|
179
|
+
logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
|
180
|
+
end
|
181
|
+
end
|
182
|
+
else
|
183
|
+
logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
|
184
|
+
end
|
185
|
+
method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
|
186
|
+
fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
|
187
|
+
if block = function.at_css("block")
|
188
|
+
methods[fully_qualified_name] = block.content.hash
|
189
|
+
else
|
190
|
+
logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
|
191
|
+
end
|
192
|
+
else
|
193
|
+
logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
if residuals?
|
198
|
+
# add residuals entry
|
199
|
+
# i.e., whats left of the code when all methods are removed
|
200
|
+
ast.search("function").remove
|
201
|
+
methods[qualified_file+':'+'@residuals'] = ast.content.hash
|
202
|
+
end
|
203
|
+
|
204
|
+
return methods
|
205
|
+
end
|
206
|
+
|
207
|
+
##
|
208
|
+
# Given two Hashes, returns all the keys that either have different values in the two hashes
|
209
|
+
# or are not in both hashes.
|
210
|
+
#
|
211
|
+
# @param: [Hash] old
|
212
|
+
# @param: [Hash] new
|
213
|
+
# @return [Array<String>]
|
214
|
+
def self.different_entries(old,new)
|
215
|
+
different = []
|
216
|
+
new.each do |k,v|
|
217
|
+
# new keys
|
218
|
+
if !old.key?(k)
|
219
|
+
# puts "KEY NOT IN OLD: #{k}"
|
220
|
+
different << k
|
221
|
+
# different values for same key
|
222
|
+
elsif v != old[k]
|
223
|
+
# puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
|
224
|
+
different << k
|
225
|
+
end
|
226
|
+
end
|
227
|
+
# keys that are only in old
|
228
|
+
deleted_keys = old.keys - new.keys
|
229
|
+
if !deleted_keys.empty?
|
230
|
+
# puts "KEY NOT IN NEW: #{deleted_keys}"
|
231
|
+
different.concat(deleted_keys)
|
232
|
+
end
|
233
|
+
return different
|
234
|
+
end
|
235
|
+
|
236
|
+
|
237
|
+
##
|
238
|
+
# Calculate the changed methods of the file specified by revision and path
|
239
|
+
#
|
240
|
+
# @param [String] old the path to the old file
|
241
|
+
# @param [String] new the path to the new file
|
242
|
+
# @return [Array<String>] the changed methods
|
243
|
+
def self.changed_methods(old,new)
|
244
|
+
methods_old = methods(old)
|
245
|
+
methods_new = methods(new)
|
246
|
+
return different_entries(methods_old,methods_new)
|
247
|
+
end
|
248
|
+
|
249
|
+
##
|
250
|
+
# Like #changed_methods but retrieves the file from a git revision
|
251
|
+
# Calculate the changed methods of the file specified by revision and path
|
252
|
+
#
|
253
|
+
# @param [String] path the path to the file
|
254
|
+
# @param [String] revision the revision to retrieve the file from
|
255
|
+
# @return [Array<String>] the changed methods
|
256
|
+
def self.changed_methods_git(path,revision)
|
257
|
+
methods_new = methods(path, revision: revision)
|
258
|
+
methods_old = methods(path, revision: revision+'~1')
|
259
|
+
return different_entries(methods_old,methods_new)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
data/lib/vcs2json/git.rb
CHANGED
@@ -1,230 +1,242 @@
|
|
1
1
|
require_relative '../vcs2json_helper'
|
2
2
|
|
3
3
|
module Vcs2Json
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
4
|
+
class Git
|
5
|
+
include Logging
|
6
|
+
|
7
|
+
attr_accessor :number, :fine_grained, :case_id
|
8
|
+
attr_reader :ignore
|
9
|
+
|
10
|
+
# Generate separators between fields and commits
|
11
|
+
FIELD_SEP = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
|
12
|
+
META_DATA = "%H#{FIELD_SEP}"\
|
13
|
+
"%an#{FIELD_SEP}"\
|
14
|
+
"%ae#{FIELD_SEP}"\
|
15
|
+
"%ad#{FIELD_SEP}"\
|
16
|
+
"%cn#{FIELD_SEP}"\
|
17
|
+
"%ce#{FIELD_SEP}"\
|
18
|
+
"%cd#{FIELD_SEP}"\
|
19
|
+
"%B"
|
20
|
+
|
21
|
+
def initialize(opts)
|
22
|
+
self.ignore = opts[:ignore]
|
23
|
+
self.before = opts[:before]
|
24
|
+
self.after = opts[:after]
|
25
|
+
self.number = opts[:number]
|
26
|
+
self.fine_grained = opts[:fine_grained]
|
27
|
+
|
28
|
+
# Set logger level
|
29
|
+
Logging.set_location(opts[:logger_location])
|
30
|
+
Logging.set_level(opts[:logger_level])
|
31
|
+
SrcML.ignore_comments = opts[:ignore_comments]
|
32
|
+
SrcML.ignore_whitespace = opts[:ignore_whitespace]
|
33
|
+
SrcML.residuals = opts[:residuals]
|
34
|
+
end
|
26
35
|
|
27
|
-
|
28
|
-
|
36
|
+
def after=(after)
|
37
|
+
if !after.nil?
|
38
|
+
begin
|
39
|
+
Date.parse(after)
|
40
|
+
@after = after
|
41
|
+
rescue
|
42
|
+
STDERR.puts "Invalid date --after=#{after}. Ignoring option."
|
43
|
+
@after = nil
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
29
47
|
|
30
|
-
|
31
|
-
|
48
|
+
def after
|
49
|
+
@after.nil? ? '' : "--after=\"#{@after}\""
|
50
|
+
end
|
32
51
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
puts e
|
42
|
-
end
|
52
|
+
def before=(before)
|
53
|
+
if !before.nil?
|
54
|
+
begin
|
55
|
+
Date.parse(before)
|
56
|
+
@before = before
|
57
|
+
rescue
|
58
|
+
STDERR.puts "Invalid date --before=#{before}. Ignoring option."
|
59
|
+
@before = nil
|
43
60
|
end
|
61
|
+
end
|
62
|
+
end
|
44
63
|
|
45
|
-
|
46
|
-
|
47
|
-
|
64
|
+
def before
|
65
|
+
@before.nil? ? '' : "--before=\"#{@before}\""
|
66
|
+
end
|
48
67
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
68
|
+
def parse
|
69
|
+
|
70
|
+
# keeps track of number of commits successfully parsed
|
71
|
+
commit_counter = 0
|
72
|
+
|
73
|
+
# keeps track of empty commits
|
74
|
+
empty_commits = []
|
75
|
+
|
76
|
+
##########################
|
77
|
+
# GET LIST OF COMMIT IDS #
|
78
|
+
##########################
|
79
|
+
|
80
|
+
# getting the list of revision ids is cheap, so we get some extra in case we are unable to parse the required amount in the first 'n' commits
|
81
|
+
commit_ids = `git rev-list HEAD #{self.before} #{self.after} -n #{self.number*10} --no-merges`.split
|
82
|
+
|
83
|
+
############################
|
84
|
+
# ITERATE OVER EACH COMMIT #
|
85
|
+
############################
|
86
|
+
|
87
|
+
commit_ids.each do |id|
|
88
|
+
logger.debug "Parsing commit: #{id}"
|
89
|
+
# get the changed files
|
90
|
+
changed_files = `git log --pretty=format:'' --name-status #{id} -n 1`.split("\n")
|
91
|
+
# removed ignored files
|
92
|
+
changed_files.reject! {|i| self.ignore.include?(i)}
|
93
|
+
# add files changed info
|
94
|
+
if !changed_files.empty?
|
95
|
+
|
96
|
+
##################
|
97
|
+
# FETCH METADATA #
|
98
|
+
##################
|
99
|
+
|
100
|
+
raw_commit = `git log --pretty=format:'#{META_DATA}' #{id} -n 1`
|
101
|
+
commit = ''
|
102
|
+
|
103
|
+
##################
|
104
|
+
# CLEAN RAW DATA #
|
105
|
+
##################
|
106
|
+
|
107
|
+
begin
|
108
|
+
# try encoding to utf8
|
109
|
+
commit = raw_commit.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
110
|
+
# need to expliceitely check if the encoding is valid for ruby <= 2.0
|
111
|
+
# utf8 -> utf8 will not do anything even with invalid bytes
|
112
|
+
# http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
|
113
|
+
if !commit.valid_encoding?
|
114
|
+
# encode to utf16 first and then back to utf8
|
115
|
+
commit.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
|
116
|
+
commit.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
97
117
|
end
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
118
|
+
rescue ArgumentError
|
119
|
+
raise EncodingError.new, "Unable to encode input as UTF-8"
|
120
|
+
end
|
121
|
+
|
122
|
+
##############################
|
123
|
+
# CONSTRUCT OUTPUT HASH/JSON #
|
124
|
+
##############################
|
125
|
+
|
126
|
+
output_hash = Hash.new
|
127
|
+
fields = commit.split(FIELD_SEP)
|
128
|
+
sha = fields[0].delete("\n") #remove astray newlines
|
129
|
+
output_hash[:sha] = sha
|
130
|
+
output_hash[:name] = fields[1]
|
131
|
+
output_hash[:email] = fields[2]
|
132
|
+
output_hash[:date] = Time.parse fields[3]
|
133
|
+
output_hash[:committer_name] = fields[4]
|
134
|
+
output_hash[:committer_email]= fields[5]
|
135
|
+
output_hash[:committer_date] = Time.parse fields[6]
|
136
|
+
output_hash[:message] = fields[7]
|
137
|
+
output_hash[:changes] = []
|
138
|
+
|
139
|
+
#######################################
|
140
|
+
# PARSE FILES FOR FINEGRAINED CHANGES #
|
141
|
+
#######################################
|
142
|
+
|
143
|
+
# print progress
|
144
|
+
|
145
|
+
|
146
|
+
changed_files.each_with_index do |line,index|
|
147
|
+
STDERR.print "Parsing file #{index+1} of #{changed_files.size} in commit #{commit_counter+1} of #{self.number} \r"
|
148
|
+
if !line.empty?
|
149
|
+
file_info = line.split("\t")
|
150
|
+
file_name = file_info[1]
|
151
|
+
status = file_info[0]
|
152
|
+
# add finer grained change info
|
153
|
+
if self.fine_grained
|
154
|
+
begin
|
155
|
+
# new file, all methods are new, no need to calculate diff
|
156
|
+
if status == 'A'
|
157
|
+
SrcML.methods(file_name,revision: id).keys.each {|m| output_hash[:changes] << m}
|
158
|
+
# calculate diffs
|
159
|
+
else
|
160
|
+
SrcML.changed_methods_git(file_name,id).each {|m| output_hash[:changes] << m}
|
161
|
+
end
|
162
|
+
rescue SrcML::UnsupportedLanguageError
|
163
|
+
output_hash[:changes] << file_name
|
108
164
|
end
|
165
|
+
else
|
166
|
+
output_hash[:changes] << file_name
|
167
|
+
end
|
109
168
|
end
|
110
|
-
|
111
|
-
end
|
169
|
+
end # changes_files.each
|
112
170
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
171
|
+
# Only add commits where at least on changes was detected
|
172
|
+
if !output_hash[:changes].empty?
|
173
|
+
###########################
|
174
|
+
# PRINT COMMIT TO $stdout #
|
175
|
+
###########################
|
119
176
|
|
120
|
-
|
121
|
-
raw_commits = `git log #{hash_2_gitoptions(opts)} --pretty=format:'%H#{FIELD_SEP}%cn#{FIELD_SEP}%ce#{FIELD_SEP}%cd#{FIELD_SEP}%ad#{FIELD_SEP}%B#{COMMIT_SEP}'`
|
122
|
-
|
123
|
-
begin
|
124
|
-
encoded = ''
|
125
|
-
# try encoding to utf8
|
126
|
-
encoded = raw_commits.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
127
|
-
# need to expliceitely check if the encoding is valid for ruby <= 2.0
|
128
|
-
# utf8 -> utf8 will not do anything even with invalid bytes
|
129
|
-
# http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
|
130
|
-
if !encoded.valid_encoding?
|
131
|
-
# encode to utf16 first and then back to utf8
|
132
|
-
encoded.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
|
133
|
-
encoded.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
134
|
-
end
|
135
|
-
# split into individual commits
|
136
|
-
commits_info = encoded.split(COMMIT_SEP)
|
137
|
-
rescue ArgumentError
|
138
|
-
raise EncodingError.new, "Unable to encode input as UTF-8"
|
139
|
-
end
|
177
|
+
$stdout.puts output_hash.to_json
|
140
178
|
|
141
|
-
|
142
|
-
|
143
|
-
sha = fields[0].delete("\n") #remove astray newlines
|
144
|
-
@commits[sha][:sha] = sha
|
145
|
-
@commits[sha][:name] = fields[1]
|
146
|
-
@commits[sha][:email] = fields[2]
|
147
|
-
@commits[sha][:date] = Time.parse fields[3]
|
148
|
-
@commits[sha][:author_date] = Time.parse fields[4]
|
149
|
-
@commits[sha][:message] = fields[5]
|
150
|
-
|
151
|
-
# attempt to parse an issue id from the commit message
|
152
|
-
if @opts[:issue]
|
153
|
-
@commits[commit[0]][:issue] = parse_issue(@commits[sha][:message])
|
154
|
-
end
|
155
|
-
end
|
156
|
-
end
|
179
|
+
# increase counter for number of commits successfully parsed
|
180
|
+
commit_counter += 1
|
157
181
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
if !commit.empty?
|
162
|
-
lines = commit.split("\n")
|
163
|
-
sha = lines[0]
|
164
|
-
@commits[sha][:changes][:all] = []
|
165
|
-
if lines.size > 1
|
166
|
-
lines[1..-1].each do |line|
|
167
|
-
if !line.empty?
|
168
|
-
file_info = line.split("\t")
|
169
|
-
file_name = file_info[1]
|
170
|
-
status = file_info[0]
|
171
|
-
@commits[sha][:changes][:all] << file_name
|
172
|
-
@commits[sha][:changes][:details][file_name][:filename] = file_name
|
173
|
-
@commits[sha][:changes][:details][file_name][:status] = parse_status(status)
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
# filter out ignored files
|
178
|
-
if !self.ignore.nil?
|
179
|
-
@commits[sha][:changes][:all].reject! {|i| self.ignore.include?(i)}
|
180
|
-
end
|
181
|
-
if @commits[sha][:changes][:all].empty?
|
182
|
-
@empty_commits << sha
|
183
|
-
@commits.delete(sha)
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
182
|
+
########################################
|
183
|
+
# CHECK IF REQUESTED AMOUNT IS REACHED #
|
184
|
+
########################################
|
188
185
|
|
189
|
-
|
190
|
-
|
191
|
-
mapping = Hash.new
|
192
|
-
index_counter = 0
|
193
|
-
@commits.each do |sha,info|
|
194
|
-
integer_representation = []
|
195
|
-
info[:changes][:all].each do |file|
|
196
|
-
if mapping[file].nil?
|
197
|
-
mapping[file] = index_counter
|
198
|
-
index_counter += 1
|
199
|
-
end
|
200
|
-
integer_representation << mapping[file]
|
201
|
-
info[:changes][:details][file][:id] = mapping[file]
|
202
|
-
end
|
203
|
-
info[:changes][:all].clear
|
204
|
-
info[:changes][:all] = integer_representation
|
186
|
+
if commit_counter == self.number
|
187
|
+
break # out of loop
|
205
188
|
end
|
189
|
+
else # no changes detected in commit
|
190
|
+
empty_commits << id
|
191
|
+
end
|
192
|
+
else # no files in commit
|
193
|
+
empty_commits << id
|
206
194
|
end
|
195
|
+
end
|
196
|
+
|
197
|
+
# we may still lack commits after exhaustive search, notify user
|
198
|
+
if commit_counter < self.number
|
199
|
+
STDERR.puts "Asked for #{self.number} commits, only found #{commit_counter} non-empty commits in the last #{self.number*2} commits"
|
200
|
+
end
|
201
|
+
# print ids of empty commits to stderr
|
202
|
+
if !empty_commits.empty?
|
203
|
+
STDERR.puts "EMPTY COMMITS"
|
204
|
+
STDERR.puts empty_commits
|
205
|
+
end
|
206
|
+
end
|
207
207
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
208
|
+
def ignore= path
|
209
|
+
default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
|
210
|
+
paths = (path.nil? ? default_locations : [path] + default_locations)
|
211
|
+
file = nil
|
212
|
+
ignore = []
|
213
|
+
paths.each do |p|
|
214
|
+
if File.exist?(p)
|
215
|
+
file = File.open(p)
|
216
|
+
STDERR.puts "Loading files to ignore from #{file.path}"
|
217
|
+
# return first match
|
218
|
+
break
|
218
219
|
end
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
220
|
+
end
|
221
|
+
if file.nil?
|
222
|
+
STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
|
223
|
+
else
|
224
|
+
if self.case_id.nil?
|
225
|
+
STDERR.puts "Id in .evocignore not specified, not ignoring any files."
|
226
|
+
else
|
227
|
+
ignore_file = YAML.load(file)
|
228
|
+
if ignore_file.key?(self.case_id)
|
229
|
+
ignore = ignore_file[self.case_id]
|
230
|
+
if !ignore.nil?
|
231
|
+
STDERR.puts "Ignoring #{ignore.size} files"
|
227
232
|
end
|
233
|
+
else
|
234
|
+
STDERR.puts "The id: '#{self.case_id}' not found in #{file.path}"
|
235
|
+
end
|
228
236
|
end
|
237
|
+
end
|
238
|
+
@ignore = (ignore.nil? ? [] : ignore)
|
239
|
+
return @ignore
|
229
240
|
end
|
241
|
+
end
|
230
242
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# enable logging in classes through 'include Logging'
|
2
|
+
module Logging
|
3
|
+
def logger
|
4
|
+
@logger ||= Logging.logger_for(self.class.name)
|
5
|
+
end
|
6
|
+
|
7
|
+
# Use a hash class-ivar to cache a unique Logger per class:
|
8
|
+
@loggers = {}
|
9
|
+
@logger_level = 'debug'
|
10
|
+
@logger_location = 'vcs2json.log'
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def logger_for(classname)
|
14
|
+
@loggers[classname] ||= configure_logger_for(classname)
|
15
|
+
end
|
16
|
+
|
17
|
+
def configure_logger_for(classname)
|
18
|
+
logger = Logger.new(@logger_location,'daily')
|
19
|
+
logger.progname = classname
|
20
|
+
logger.level = const_get('Logger::'+@logger_level.upcase)
|
21
|
+
logger
|
22
|
+
end
|
23
|
+
|
24
|
+
def set_location(path)
|
25
|
+
@logger_location = path
|
26
|
+
end
|
27
|
+
|
28
|
+
def set_level(level)
|
29
|
+
possible_levels = %w(debug info warn error info)
|
30
|
+
if !level.nil?
|
31
|
+
if !level.empty?
|
32
|
+
if possible_levels.include?(level)
|
33
|
+
STDERR.puts "Logging level has been set to '#{level}' for output to #{@logger_location}"
|
34
|
+
@loggers.each {|l| l.level = const_get('Logger::'+level.upcase)}
|
35
|
+
@logger_level = level
|
36
|
+
else
|
37
|
+
STDERR.puts "Unable to set logger level to #{level}, possible values are #{possible_levels}. Defaulting to 'info'."
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/vcs2json/version.rb
CHANGED
data/lib/vcs2json_helper.rb
CHANGED
@@ -4,5 +4,8 @@ require 'json/pure'
|
|
4
4
|
require 'time'
|
5
5
|
require 'csv'
|
6
6
|
require 'chronic'
|
7
|
+
require 'logger' # leveled logging
|
8
|
+
require 'nokogiri' # better/faster xml library
|
9
|
+
require 'open3' # make system calls and capture stdout/stderr/exitcodes easily
|
7
10
|
require 'require_all'
|
8
11
|
require_rel '/**/*.rb'
|
data/vcs2json.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vcs2json
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Thomas Rolfsnes
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
111
125
|
description:
|
112
126
|
email:
|
113
127
|
- mail@thomasrolfsnes.com
|
@@ -136,7 +150,10 @@ files:
|
|
136
150
|
- lib/exceptions/no_date_field.rb
|
137
151
|
- lib/exceptions/no_file_section.rb
|
138
152
|
- lib/exceptions/no_time_data_in_chafiles_field.rb
|
153
|
+
- lib/exceptions/unsupported_language.rb
|
154
|
+
- lib/srcML/srcml.rb
|
139
155
|
- lib/vcs2json/git.rb
|
156
|
+
- lib/vcs2json/logger.rb
|
140
157
|
- lib/vcs2json/version.rb
|
141
158
|
- lib/vcs2json_helper.rb
|
142
159
|
- vcs2json.gemspec
|