git-commits-analyzer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/analyze_commits +51 -0
- data/lib/git-commits-analyzer/utils.rb +54 -0
- data/lib/git-commits-analyzer.rb +198 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e1c430c515c28a8a1a8f6dea0098f0590101095f
|
4
|
+
data.tar.gz: 01df65d27d647d9dff0207927d9d550a93111b0f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 21b04677942df99acdef4d4d44cfc907fdeee7907a121c695e616c093d98e99af87ab242aa036009be2644fb629b05d61ebaaaf3c024c4f6fc8cdd5925ffa614
|
7
|
+
data.tar.gz: a647cccacdec435bd1d73f444b76157f363f671e6da468917fb7e637c8dc090e3c9c9fa7c7e4c5612afbc48e0d985119c5b5f609e2269a786a2f564446be07bd
|
data/bin/analyze_commits
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Gems.
|
4
|
+
require 'logger'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
# Internal classes.
|
8
|
+
require 'git-commits-analyzer'
|
9
|
+
require 'git-commits-analyzer/utils'
|
10
|
+
|
11
|
+
|
12
|
+
##### CONFIGURATION #####
|
13
|
+
|
14
|
+
# Configure logger.
|
15
|
+
logger = Logger.new(STDOUT)
|
16
|
+
logger.level = Logger::WARN
|
17
|
+
|
18
|
+
|
19
|
+
##### MAIN #####
|
20
|
+
|
21
|
+
# Parse command line options.
|
22
|
+
options = Utils.parse_command_line_options()
|
23
|
+
|
24
|
+
# Find git repos to inspect.
|
25
|
+
repos = Utils.get_git_repos(path: options[:path])
|
26
|
+
puts "Found " + repos.length.to_s + " repos to inspect."
|
27
|
+
puts ""
|
28
|
+
|
29
|
+
# Inspect git repos.
|
30
|
+
puts "===== Inspecting repos ====="
|
31
|
+
puts ""
|
32
|
+
git_commits_analyzer = GitCommitsAnalyzer.new(logger: logger, author: options[:authors])
|
33
|
+
repos.sort.each do |repo|
|
34
|
+
puts "Inspecting repo " + repo
|
35
|
+
git_commits_analyzer.parse_repo(repo: repo)
|
36
|
+
#break
|
37
|
+
end
|
38
|
+
puts ""
|
39
|
+
|
40
|
+
# Display sanity check.
|
41
|
+
puts "Found #{git_commits_analyzer.total_commits} commits for author(s) " + options[:authors].join(', ')
|
42
|
+
puts ""
|
43
|
+
exit if git_commits_analyzer.monthly_commits.keys.length == 0
|
44
|
+
|
45
|
+
# Save data.
|
46
|
+
puts "===== Save data ====="
|
47
|
+
puts ""
|
48
|
+
output_file = options[:output];
|
49
|
+
File.open(output_file, 'w') { |file| file.write(git_commits_analyzer.to_json) }
|
50
|
+
puts "Re-generated #{output_file}."
|
51
|
+
puts ""
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
class Utils
|
4
|
+
def self.parse_command_line_options()
|
5
|
+
options = {}
|
6
|
+
OptionParser.new do |opts|
|
7
|
+
opts.banner = "Usage: inspect_contributions.rb [options]"
|
8
|
+
options[:authors] = []
|
9
|
+
|
10
|
+
# Parse path.
|
11
|
+
opts.on("-p", "--path PATH", "Specify a path to search for git repositories under") do |path|
|
12
|
+
options[:path] = path
|
13
|
+
end
|
14
|
+
|
15
|
+
# Parse authors.
|
16
|
+
opts.on("-a", "--author EMAIL", "Include this author in statistics") do |email|
|
17
|
+
options[:authors] << email
|
18
|
+
end
|
19
|
+
|
20
|
+
# Parse output directory.
|
21
|
+
opts.on("-p", "--output PATH", "Specify a path to output files with collected data") do |output|
|
22
|
+
options[:output] = output
|
23
|
+
end
|
24
|
+
|
25
|
+
# Show usage
|
26
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
27
|
+
puts opts
|
28
|
+
exit
|
29
|
+
end
|
30
|
+
end.parse!
|
31
|
+
|
32
|
+
# Check mandatory options.
|
33
|
+
raise OptionParser::MissingArgument, '--author' if options[:authors].length == 0
|
34
|
+
raise OptionParser::MissingArgument, '--output' if options[:output].nil?
|
35
|
+
raise OptionParser::MissingArgument, '--path' if options[:path].nil?
|
36
|
+
|
37
|
+
return options
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.get_git_repos(path:)
|
41
|
+
repos = []
|
42
|
+
Dir.glob(File.join(path, '*')) do |dir|
|
43
|
+
# Skip files.
|
44
|
+
next if !File.directory?(dir)
|
45
|
+
|
46
|
+
# Skip directories without .git subdirectory (shortcut to identify repos).
|
47
|
+
next if !File.directory?(File.join(dir, '.git'))
|
48
|
+
|
49
|
+
repos << dir
|
50
|
+
end
|
51
|
+
|
52
|
+
return repos
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'git'
|
3
|
+
require 'git_diff_parser'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
# Public: parse git logs for language and commit metadata.
|
7
|
+
#
|
8
|
+
# Examples:
|
9
|
+
#
|
10
|
+
# git_parser = GitCommitsAnalyzer.new(logger: logger, author: author)
|
11
|
+
#
|
12
|
+
class GitCommitsAnalyzer
|
13
|
+
# Public: Returns a hash of commit numbers broken down by month.
|
14
|
+
attr_reader :monthly_commits
|
15
|
+
|
16
|
+
# Public: Returns the total number of commits belonging to the author
|
17
|
+
# specified.
|
18
|
+
attr_reader :total_commits
|
19
|
+
|
20
|
+
# Public: Returns the number of lines added/removed broken down by language.
|
21
|
+
attr_reader :lines_by_language
|
22
|
+
|
23
|
+
# Public: Initialize new GitParser object.
|
24
|
+
#
|
25
|
+
# logger - A logger object to display git errors/warnings.
|
26
|
+
# author - The email of the git author for whom we should compile the metadata.
|
27
|
+
#
|
28
|
+
def initialize(logger:, author:)
|
29
|
+
@logger = logger
|
30
|
+
@author = author
|
31
|
+
@monthly_commits = {}
|
32
|
+
@monthly_commits.default = 0
|
33
|
+
@total_commits = 0
|
34
|
+
@lines_by_language = {}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Public: Determine the type of a file at the given revision of a repo.
|
38
|
+
#
|
39
|
+
# filename - The name of the file to analyze.
|
40
|
+
# sha - The commit ID.
|
41
|
+
# git_repo - A git repo object corresponding to the underlying repo.
|
42
|
+
#
|
43
|
+
# Returns a string corresponding to the language of the file.
|
44
|
+
#
|
45
|
+
def self.determine_language(filename:, sha:, git_repo:)
|
46
|
+
return nil if filename == 'LICENSE'
|
47
|
+
|
48
|
+
# First try to match on known extensions.
|
49
|
+
case filename
|
50
|
+
when /\.(pl|pm|t|cgi|pod|run)$/i
|
51
|
+
return 'Perl'
|
52
|
+
when /\.rb$/
|
53
|
+
return 'Ruby'
|
54
|
+
when /\.md$/
|
55
|
+
return 'Markdown'
|
56
|
+
when /\.json$/
|
57
|
+
return 'JSON'
|
58
|
+
when /\.(yml|yaml)$/
|
59
|
+
return 'YAML'
|
60
|
+
when /\.?(perlcriticrc|githooksrc|ini|editorconfig|gitconfig)$/
|
61
|
+
return 'INI'
|
62
|
+
when /\.css$/
|
63
|
+
return 'CSS'
|
64
|
+
when /\.(tt2|html)$/
|
65
|
+
return 'HTML'
|
66
|
+
when /\.sql$/
|
67
|
+
return 'SQL'
|
68
|
+
when /\.py$/
|
69
|
+
return 'Python'
|
70
|
+
when /\.js$/
|
71
|
+
return 'JavaScript'
|
72
|
+
when /\.c$/
|
73
|
+
return 'C'
|
74
|
+
when /\.sh$/
|
75
|
+
return 'bash'
|
76
|
+
when /(bash|bash_\w+)$/
|
77
|
+
return 'bash'
|
78
|
+
when /\.?(SKIP|gitignore|txt|csv|vim|gitmodules|gitattributes|jshintrc|gperf|vimrc|psqlrc|inputrc|screenrc)$/
|
79
|
+
return 'Text'
|
80
|
+
when /^(README|MANIFEST|Changes|Gemfile|Gemfile.lock)$/
|
81
|
+
return 'Text'
|
82
|
+
end
|
83
|
+
|
84
|
+
# Next, retrieve the file content and infer from that.
|
85
|
+
begin
|
86
|
+
content = git_repo.show(sha, filename)
|
87
|
+
rescue
|
88
|
+
pp "#{$!}"
|
89
|
+
end
|
90
|
+
return nil if content == nil || content == ''
|
91
|
+
|
92
|
+
first_line = content.split(/\n/)[0] || ''
|
93
|
+
case first_line
|
94
|
+
when /perl$/
|
95
|
+
return 'Perl'
|
96
|
+
end
|
97
|
+
|
98
|
+
# Fall back on the extension in last resort.
|
99
|
+
extension = /\.([^\.]+)$/.match(filename)
|
100
|
+
return filename if extension.nil?
|
101
|
+
return nil if extension[0] == 'lock'
|
102
|
+
return extension[0]
|
103
|
+
end
|
104
|
+
|
105
|
+
# Public: Parse the git logs for a repo.
|
106
|
+
#
|
107
|
+
# repo - A git repo object corresponding to the underlying repo.
|
108
|
+
#
|
109
|
+
# This method adds the metadata extracted for this repo to the instance
|
110
|
+
# variables collecting commit metadata.
|
111
|
+
#
|
112
|
+
def parse_repo(repo:)
|
113
|
+
git_repo = Git.open(repo, :log => @logger)
|
114
|
+
|
115
|
+
# Note: override the default of 30 for count(), nil gives the whole git log
|
116
|
+
# history.
|
117
|
+
git_repo.log(count = nil).each do |commit|
|
118
|
+
# Only include the authors specified on the command line.
|
119
|
+
next if !@author.include?(commit.author.email)
|
120
|
+
|
121
|
+
# Parse diff and analyze patches to detect language.
|
122
|
+
diff = commit.diff_parent.to_s
|
123
|
+
diff.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
124
|
+
|
125
|
+
patches = GitDiffParser.parse(diff)
|
126
|
+
patches.each do |patch|
|
127
|
+
body = patch.instance_variable_get :@body
|
128
|
+
language = self.class.determine_language(filename: patch.file, sha: commit.sha, git_repo: git_repo)
|
129
|
+
next if language == nil
|
130
|
+
@lines_by_language[language] ||=
|
131
|
+
{
|
132
|
+
'added' => 0,
|
133
|
+
'deleted' => 0
|
134
|
+
}
|
135
|
+
|
136
|
+
body.split(/\n/).each do |content|
|
137
|
+
if (/^[+-]/.match(content) && !/^[+-]\s+$/.match(content))
|
138
|
+
if (/^\+/.match(content))
|
139
|
+
@lines_by_language[language]['added'] += 1
|
140
|
+
elsif (/^\-/.match(content))
|
141
|
+
@lines_by_language[language]['deleted'] += 1
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Add to stats for monthly commit count.
|
148
|
+
# Note: months are zero-padded to allow easy sorting, even if it's more
|
149
|
+
# work for formatting later on.
|
150
|
+
@monthly_commits[commit.date.strftime("%Y-%m")] += 1
|
151
|
+
|
152
|
+
# Add to stats for total commits count.
|
153
|
+
@total_commits += 1
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Public: Get a range of months from the earliest commit to the latest.
|
158
|
+
#
|
159
|
+
# Returns an array of "YYYY-MM" strings.
|
160
|
+
#
|
161
|
+
def get_month_scale()
|
162
|
+
month_scale = []
|
163
|
+
commits_start = @monthly_commits.keys.sort.first.split('-').map { |x| x.to_i }
|
164
|
+
commits_end = @monthly_commits.keys.sort.last.split('-').map { |x| x.to_i }
|
165
|
+
commits_start[0].upto(commits_end[0]) do |year|
|
166
|
+
1.upto(12) do |month|
|
167
|
+
next if month < commits_start[1] && year == commits_start[0]
|
168
|
+
next if month > commits_end[1] && year == commits_end[0]
|
169
|
+
month_scale << [year, month]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
return month_scale
|
174
|
+
end
|
175
|
+
|
176
|
+
# Public: Generate a JSON representation of the parsed data.
|
177
|
+
#
|
178
|
+
# Returns: a JSON string.
|
179
|
+
#
|
180
|
+
def to_json()
|
181
|
+
formatted_monthly_commits = []
|
182
|
+
month_names = Date::ABBR_MONTHNAMES
|
183
|
+
self.get_month_scale.each do |frame|
|
184
|
+
display_key = month_names[frame[1]] + '-' + frame[0].to_s
|
185
|
+
data_key = sprintf('%s-%02d', frame[0], frame[1])
|
186
|
+
count = @monthly_commits[data_key].to_s
|
187
|
+
formatted_monthly_commits << { :month => display_key, :commits => count.to_s }
|
188
|
+
end
|
189
|
+
|
190
|
+
return JSON.pretty_generate(
|
191
|
+
{
|
192
|
+
:monthly_commits => formatted_monthly_commits,
|
193
|
+
:total_commits => @total_commits,
|
194
|
+
:lines_by_language => @lines_by_language,
|
195
|
+
}
|
196
|
+
)
|
197
|
+
end
|
198
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: git-commits-analyzer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Guillaume Aubert
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-12 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Parse git repos and collect commit statistics/data for a given author.
|
14
|
+
email: aubertg@cpan.org
|
15
|
+
executables:
|
16
|
+
- analyze_commits
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- bin/analyze_commits
|
21
|
+
- lib/git-commits-analyzer.rb
|
22
|
+
- lib/git-commits-analyzer/utils.rb
|
23
|
+
homepage: http://rubygems.org/gems/
|
24
|
+
licenses:
|
25
|
+
- GPLv3
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 2.4.6
|
44
|
+
signing_key:
|
45
|
+
specification_version: 4
|
46
|
+
summary: Analyze git commits
|
47
|
+
test_files: []
|