git-commits-analyzer 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/analyze_commits +51 -0
- data/lib/git-commits-analyzer/utils.rb +54 -0
- data/lib/git-commits-analyzer.rb +198 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e1c430c515c28a8a1a8f6dea0098f0590101095f
|
4
|
+
data.tar.gz: 01df65d27d647d9dff0207927d9d550a93111b0f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 21b04677942df99acdef4d4d44cfc907fdeee7907a121c695e616c093d98e99af87ab242aa036009be2644fb629b05d61ebaaaf3c024c4f6fc8cdd5925ffa614
|
7
|
+
data.tar.gz: a647cccacdec435bd1d73f444b76157f363f671e6da468917fb7e637c8dc090e3c9c9fa7c7e4c5612afbc48e0d985119c5b5f609e2269a786a2f564446be07bd
|
data/bin/analyze_commits
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gems.
|
4
|
+
require 'logger'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
# Internal classes.
|
8
|
+
require 'git-commits-analyzer'
|
9
|
+
require 'git-commits-analyzer/utils'
|
10
|
+
|
11
|
+
|
12
|
+
##### CONFIGURATION #####
|
13
|
+
|
14
|
+
# Configure logger.
|
15
|
+
logger = Logger.new(STDOUT)
|
16
|
+
logger.level = Logger::WARN
|
17
|
+
|
18
|
+
|
19
|
+
##### MAIN #####
|
20
|
+
|
21
|
+
# Parse command line options.
|
22
|
+
options = Utils.parse_command_line_options()
|
23
|
+
|
24
|
+
# Find git repos to inspect.
|
25
|
+
repos = Utils.get_git_repos(path: options[:path])
|
26
|
+
puts "Found " + repos.length.to_s + " repos to inspect."
|
27
|
+
puts ""
|
28
|
+
|
29
|
+
# Inspect git repos.
|
30
|
+
puts "===== Inspecting repos ====="
|
31
|
+
puts ""
|
32
|
+
git_commits_analyzer = GitCommitsAnalyzer.new(logger: logger, author: options[:authors])
|
33
|
+
repos.sort.each do |repo|
|
34
|
+
puts "Inspecting repo " + repo
|
35
|
+
git_commits_analyzer.parse_repo(repo: repo)
|
36
|
+
#break
|
37
|
+
end
|
38
|
+
puts ""
|
39
|
+
|
40
|
+
# Display sanity check.
|
41
|
+
puts "Found #{git_commits_analyzer.total_commits} commits for author(s) " + options[:authors].join(', ')
|
42
|
+
puts ""
|
43
|
+
exit if git_commits_analyzer.monthly_commits.keys.length == 0
|
44
|
+
|
45
|
+
# Save data.
|
46
|
+
puts "===== Save data ====="
|
47
|
+
puts ""
|
48
|
+
output_file = options[:output];
|
49
|
+
File.open(output_file, 'w') { |file| file.write(git_commits_analyzer.to_json) }
|
50
|
+
puts "Re-generated #{output_file}."
|
51
|
+
puts ""
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
class Utils
|
4
|
+
def self.parse_command_line_options()
|
5
|
+
options = {}
|
6
|
+
OptionParser.new do |opts|
|
7
|
+
opts.banner = "Usage: inspect_contributions.rb [options]"
|
8
|
+
options[:authors] = []
|
9
|
+
|
10
|
+
# Parse path.
|
11
|
+
opts.on("-p", "--path PATH", "Specify a path to search for git repositories under") do |path|
|
12
|
+
options[:path] = path
|
13
|
+
end
|
14
|
+
|
15
|
+
# Parse authors.
|
16
|
+
opts.on("-a", "--author EMAIL", "Include this author in statistics") do |email|
|
17
|
+
options[:authors] << email
|
18
|
+
end
|
19
|
+
|
20
|
+
# Parse output directory.
|
21
|
+
opts.on("-p", "--output PATH", "Specify a path to output files with collected data") do |output|
|
22
|
+
options[:output] = output
|
23
|
+
end
|
24
|
+
|
25
|
+
# Show usage
|
26
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
27
|
+
puts opts
|
28
|
+
exit
|
29
|
+
end
|
30
|
+
end.parse!
|
31
|
+
|
32
|
+
# Check mandatory options.
|
33
|
+
raise OptionParser::MissingArgument, '--author' if options[:authors].length == 0
|
34
|
+
raise OptionParser::MissingArgument, '--output' if options[:output].nil?
|
35
|
+
raise OptionParser::MissingArgument, '--path' if options[:path].nil?
|
36
|
+
|
37
|
+
return options
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.get_git_repos(path:)
|
41
|
+
repos = []
|
42
|
+
Dir.glob(File.join(path, '*')) do |dir|
|
43
|
+
# Skip files.
|
44
|
+
next if !File.directory?(dir)
|
45
|
+
|
46
|
+
# Skip directories without .git subdirectory (shortcut to identify repos).
|
47
|
+
next if !File.directory?(File.join(dir, '.git'))
|
48
|
+
|
49
|
+
repos << dir
|
50
|
+
end
|
51
|
+
|
52
|
+
return repos
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'git'
|
3
|
+
require 'git_diff_parser'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
# Public: parse git logs for language and commit metadata.
|
7
|
+
#
|
8
|
+
# Examples:
|
9
|
+
#
|
10
|
+
# git_parser = GitCommitsAnalyzer.new(logger: logger, author: author)
|
11
|
+
#
|
12
|
+
class GitCommitsAnalyzer
|
13
|
+
# Public: Returns a hash of commit numbers broken down by month.
|
14
|
+
attr_reader :monthly_commits
|
15
|
+
|
16
|
+
# Public: Returns the total number of commits belonging to the author
|
17
|
+
# specified.
|
18
|
+
attr_reader :total_commits
|
19
|
+
|
20
|
+
# Public: Returns the number of lines added/removed broken down by language.
|
21
|
+
attr_reader :lines_by_language
|
22
|
+
|
23
|
+
# Public: Initialize new GitParser object.
|
24
|
+
#
|
25
|
+
# logger - A logger object to display git errors/warnings.
|
26
|
+
# author - The email of the git author for whom we should compile the metadata.
|
27
|
+
#
|
28
|
+
def initialize(logger:, author:)
|
29
|
+
@logger = logger
|
30
|
+
@author = author
|
31
|
+
@monthly_commits = {}
|
32
|
+
@monthly_commits.default = 0
|
33
|
+
@total_commits = 0
|
34
|
+
@lines_by_language = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Public: Determine the type of a file at the given revision of a repo.
|
38
|
+
#
|
39
|
+
# filename - The name of the file to analyze.
|
40
|
+
# sha - The commit ID.
|
41
|
+
# git_repo - A git repo object corresponding to the underlying repo.
|
42
|
+
#
|
43
|
+
# Returns a string corresponding to the language of the file.
|
44
|
+
#
|
45
|
+
def self.determine_language(filename:, sha:, git_repo:)
|
46
|
+
return nil if filename == 'LICENSE'
|
47
|
+
|
48
|
+
# First try to match on known extensions.
|
49
|
+
case filename
|
50
|
+
when /\.(pl|pm|t|cgi|pod|run)$/i
|
51
|
+
return 'Perl'
|
52
|
+
when /\.rb$/
|
53
|
+
return 'Ruby'
|
54
|
+
when /\.md$/
|
55
|
+
return 'Markdown'
|
56
|
+
when /\.json$/
|
57
|
+
return 'JSON'
|
58
|
+
when /\.(yml|yaml)$/
|
59
|
+
return 'YAML'
|
60
|
+
when /\.?(perlcriticrc|githooksrc|ini|editorconfig|gitconfig)$/
|
61
|
+
return 'INI'
|
62
|
+
when /\.css$/
|
63
|
+
return 'CSS'
|
64
|
+
when /\.(tt2|html)$/
|
65
|
+
return 'HTML'
|
66
|
+
when /\.sql$/
|
67
|
+
return 'SQL'
|
68
|
+
when /\.py$/
|
69
|
+
return 'Python'
|
70
|
+
when /\.js$/
|
71
|
+
return 'JavaScript'
|
72
|
+
when /\.c$/
|
73
|
+
return 'C'
|
74
|
+
when /\.sh$/
|
75
|
+
return 'bash'
|
76
|
+
when /(bash|bash_\w+)$/
|
77
|
+
return 'bash'
|
78
|
+
when /\.?(SKIP|gitignore|txt|csv|vim|gitmodules|gitattributes|jshintrc|gperf|vimrc|psqlrc|inputrc|screenrc)$/
|
79
|
+
return 'Text'
|
80
|
+
when /^(README|MANIFEST|Changes|Gemfile|Gemfile.lock)$/
|
81
|
+
return 'Text'
|
82
|
+
end
|
83
|
+
|
84
|
+
# Next, retrieve the file content and infer from that.
|
85
|
+
begin
|
86
|
+
content = git_repo.show(sha, filename)
|
87
|
+
rescue
|
88
|
+
pp "#{$!}"
|
89
|
+
end
|
90
|
+
return nil if content == nil || content == ''
|
91
|
+
|
92
|
+
first_line = content.split(/\n/)[0] || ''
|
93
|
+
case first_line
|
94
|
+
when /perl$/
|
95
|
+
return 'Perl'
|
96
|
+
end
|
97
|
+
|
98
|
+
# Fall back on the extension in last resort.
|
99
|
+
extension = /\.([^\.]+)$/.match(filename)
|
100
|
+
return filename if extension.nil?
|
101
|
+
return nil if extension[0] == 'lock'
|
102
|
+
return extension[0]
|
103
|
+
end
|
104
|
+
|
105
|
+
# Public: Parse the git logs for a repo.
|
106
|
+
#
|
107
|
+
# repo - A git repo object corresponding to the underlying repo.
|
108
|
+
#
|
109
|
+
# This method adds the metadata extracted for this repo to the instance
|
110
|
+
# variables collecting commit metadata.
|
111
|
+
#
|
112
|
+
def parse_repo(repo:)
|
113
|
+
git_repo = Git.open(repo, :log => @logger)
|
114
|
+
|
115
|
+
# Note: override the default of 30 for count(), nil gives the whole git log
|
116
|
+
# history.
|
117
|
+
git_repo.log(count = nil).each do |commit|
|
118
|
+
# Only include the authors specified on the command line.
|
119
|
+
next if !@author.include?(commit.author.email)
|
120
|
+
|
121
|
+
# Parse diff and analyze patches to detect language.
|
122
|
+
diff = commit.diff_parent.to_s
|
123
|
+
diff.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
124
|
+
|
125
|
+
patches = GitDiffParser.parse(diff)
|
126
|
+
patches.each do |patch|
|
127
|
+
body = patch.instance_variable_get :@body
|
128
|
+
language = self.class.determine_language(filename: patch.file, sha: commit.sha, git_repo: git_repo)
|
129
|
+
next if language == nil
|
130
|
+
@lines_by_language[language] ||=
|
131
|
+
{
|
132
|
+
'added' => 0,
|
133
|
+
'deleted' => 0
|
134
|
+
}
|
135
|
+
|
136
|
+
body.split(/\n/).each do |content|
|
137
|
+
if (/^[+-]/.match(content) && !/^[+-]\s+$/.match(content))
|
138
|
+
if (/^\+/.match(content))
|
139
|
+
@lines_by_language[language]['added'] += 1
|
140
|
+
elsif (/^\-/.match(content))
|
141
|
+
@lines_by_language[language]['deleted'] += 1
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Add to stats for monthly commit count.
|
148
|
+
# Note: months are zero-padded to allow easy sorting, even if it's more
|
149
|
+
# work for formatting later on.
|
150
|
+
@monthly_commits[commit.date.strftime("%Y-%m")] += 1
|
151
|
+
|
152
|
+
# Add to stats for total commits count.
|
153
|
+
@total_commits += 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Public: Get a range of months from the earliest commit to the latest.
|
158
|
+
#
|
159
|
+
# Returns an array of "YYYY-MM" strings.
|
160
|
+
#
|
161
|
+
def get_month_scale()
|
162
|
+
month_scale = []
|
163
|
+
commits_start = @monthly_commits.keys.sort.first.split('-').map { |x| x.to_i }
|
164
|
+
commits_end = @monthly_commits.keys.sort.last.split('-').map { |x| x.to_i }
|
165
|
+
commits_start[0].upto(commits_end[0]) do |year|
|
166
|
+
1.upto(12) do |month|
|
167
|
+
next if month < commits_start[1] && year == commits_start[0]
|
168
|
+
next if month > commits_end[1] && year == commits_end[0]
|
169
|
+
month_scale << [year, month]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
return month_scale
|
174
|
+
end
|
175
|
+
|
176
|
+
# Public: Generate a JSON representation of the parsed data.
|
177
|
+
#
|
178
|
+
# Returns: a JSON string.
|
179
|
+
#
|
180
|
+
def to_json()
|
181
|
+
formatted_monthly_commits = []
|
182
|
+
month_names = Date::ABBR_MONTHNAMES
|
183
|
+
self.get_month_scale.each do |frame|
|
184
|
+
display_key = month_names[frame[1]] + '-' + frame[0].to_s
|
185
|
+
data_key = sprintf('%s-%02d', frame[0], frame[1])
|
186
|
+
count = @monthly_commits[data_key].to_s
|
187
|
+
formatted_monthly_commits << { :month => display_key, :commits => count.to_s }
|
188
|
+
end
|
189
|
+
|
190
|
+
return JSON.pretty_generate(
|
191
|
+
{
|
192
|
+
:monthly_commits => formatted_monthly_commits,
|
193
|
+
:total_commits => @total_commits,
|
194
|
+
:lines_by_language => @lines_by_language,
|
195
|
+
}
|
196
|
+
)
|
197
|
+
end
|
198
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: git-commits-analyzer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Guillaume Aubert
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-12 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Parse git repos and collect commit statistics/data for a given author.
|
14
|
+
email: aubertg@cpan.org
|
15
|
+
executables:
|
16
|
+
- analyze_commits
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/analyze_commits
|
21
|
+
- lib/git-commits-analyzer.rb
|
22
|
+
- lib/git-commits-analyzer/utils.rb
|
23
|
+
homepage: http://rubygems.org/gems/
|
24
|
+
licenses:
|
25
|
+
- GPLv3
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 2.4.6
|
44
|
+
signing_key:
|
45
|
+
specification_version: 4
|
46
|
+
summary: Analyze git commits
|
47
|
+
test_files: []
|