git-commits-analyzer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e1c430c515c28a8a1a8f6dea0098f0590101095f
4
+ data.tar.gz: 01df65d27d647d9dff0207927d9d550a93111b0f
5
+ SHA512:
6
+ metadata.gz: 21b04677942df99acdef4d4d44cfc907fdeee7907a121c695e616c093d98e99af87ab242aa036009be2644fb629b05d61ebaaaf3c024c4f6fc8cdd5925ffa614
7
+ data.tar.gz: a647cccacdec435bd1d73f444b76157f363f671e6da468917fb7e637c8dc090e3c9c9fa7c7e4c5612afbc48e0d985119c5b5f609e2269a786a2f564446be07bd
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Gems.
4
+ require 'logger'
5
+ require 'pp'
6
+
7
+ # Internal classes.
8
+ require 'git-commits-analyzer'
9
+ require 'git-commits-analyzer/utils'
10
+
11
+
12
+ ##### CONFIGURATION #####
13
+
14
+ # Configure logger.
15
+ logger = Logger.new(STDOUT)
16
+ logger.level = Logger::WARN
17
+
18
+
19
+ ##### MAIN #####
20
+
21
+ # Parse command line options.
22
+ options = Utils.parse_command_line_options()
23
+
24
+ # Find git repos to inspect.
25
+ repos = Utils.get_git_repos(path: options[:path])
26
+ puts "Found " + repos.length.to_s + " repos to inspect."
27
+ puts ""
28
+
29
+ # Inspect git repos.
30
+ puts "===== Inspecting repos ====="
31
+ puts ""
32
+ git_commits_analyzer = GitCommitsAnalyzer.new(logger: logger, author: options[:authors])
33
+ repos.sort.each do |repo|
34
+ puts "Inspecting repo " + repo
35
+ git_commits_analyzer.parse_repo(repo: repo)
36
+ #break
37
+ end
38
+ puts ""
39
+
40
+ # Display sanity check.
41
+ puts "Found #{git_commits_analyzer.total_commits} commits for author(s) " + options[:authors].join(', ')
42
+ puts ""
43
+ exit if git_commits_analyzer.monthly_commits.keys.length == 0
44
+
45
+ # Save data.
46
+ puts "===== Save data ====="
47
+ puts ""
48
+ output_file = options[:output];
49
+ File.open(output_file, 'w') { |file| file.write(git_commits_analyzer.to_json) }
50
+ puts "Re-generated #{output_file}."
51
+ puts ""
@@ -0,0 +1,54 @@
1
+ require 'optparse'
2
+
3
+ class Utils
4
+ def self.parse_command_line_options()
5
+ options = {}
6
+ OptionParser.new do |opts|
7
+ opts.banner = "Usage: inspect_contributions.rb [options]"
8
+ options[:authors] = []
9
+
10
+ # Parse path.
11
+ opts.on("-p", "--path PATH", "Specify a path to search for git repositories under") do |path|
12
+ options[:path] = path
13
+ end
14
+
15
+ # Parse authors.
16
+ opts.on("-a", "--author EMAIL", "Include this author in statistics") do |email|
17
+ options[:authors] << email
18
+ end
19
+
20
+ # Parse output directory.
21
+ opts.on("-p", "--output PATH", "Specify a path to output files with collected data") do |output|
22
+ options[:output] = output
23
+ end
24
+
25
+ # Show usage
26
+ opts.on_tail("-h", "--help", "Show this message") do
27
+ puts opts
28
+ exit
29
+ end
30
+ end.parse!
31
+
32
+ # Check mandatory options.
33
+ raise OptionParser::MissingArgument, '--author' if options[:authors].length == 0
34
+ raise OptionParser::MissingArgument, '--output' if options[:output].nil?
35
+ raise OptionParser::MissingArgument, '--path' if options[:path].nil?
36
+
37
+ return options
38
+ end
39
+
40
+ def self.get_git_repos(path:)
41
+ repos = []
42
+ Dir.glob(File.join(path, '*')) do |dir|
43
+ # Skip files.
44
+ next if !File.directory?(dir)
45
+
46
+ # Skip directories without .git subdirectory (shortcut to identify repos).
47
+ next if !File.directory?(File.join(dir, '.git'))
48
+
49
+ repos << dir
50
+ end
51
+
52
+ return repos
53
+ end
54
+ end
@@ -0,0 +1,198 @@
1
+ require 'date'
2
+ require 'git'
3
+ require 'git_diff_parser'
4
+ require 'json'
5
+
6
+ # Public: parse git logs for language and commit metadata.
7
+ #
8
+ # Examples:
9
+ #
10
+ # git_parser = GitCommitsAnalyzer.new(logger: logger, author: author)
11
+ #
12
+ class GitCommitsAnalyzer
13
+ # Public: Returns a hash of commit numbers broken down by month.
14
+ attr_reader :monthly_commits
15
+
16
+ # Public: Returns the total number of commits belonging to the author
17
+ # specified.
18
+ attr_reader :total_commits
19
+
20
+ # Public: Returns the number of lines added/removed broken down by language.
21
+ attr_reader :lines_by_language
22
+
23
+ # Public: Initialize new GitParser object.
24
+ #
25
+ # logger - A logger object to display git errors/warnings.
26
+ # author - The email of the git author for whom we should compile the metadata.
27
+ #
28
+ def initialize(logger:, author:)
29
+ @logger = logger
30
+ @author = author
31
+ @monthly_commits = {}
32
+ @monthly_commits.default = 0
33
+ @total_commits = 0
34
+ @lines_by_language = {}
35
+ end
36
+
37
+ # Public: Determine the type of a file at the given revision of a repo.
38
+ #
39
+ # filename - The name of the file to analyze.
40
+ # sha - The commit ID.
41
+ # git_repo - A git repo object corresponding to the underlying repo.
42
+ #
43
+ # Returns a string corresponding to the language of the file.
44
+ #
45
+ def self.determine_language(filename:, sha:, git_repo:)
46
+ return nil if filename == 'LICENSE'
47
+
48
+ # First try to match on known extensions.
49
+ case filename
50
+ when /\.(pl|pm|t|cgi|pod|run)$/i
51
+ return 'Perl'
52
+ when /\.rb$/
53
+ return 'Ruby'
54
+ when /\.md$/
55
+ return 'Markdown'
56
+ when /\.json$/
57
+ return 'JSON'
58
+ when /\.(yml|yaml)$/
59
+ return 'YAML'
60
+ when /\.?(perlcriticrc|githooksrc|ini|editorconfig|gitconfig)$/
61
+ return 'INI'
62
+ when /\.css$/
63
+ return 'CSS'
64
+ when /\.(tt2|html)$/
65
+ return 'HTML'
66
+ when /\.sql$/
67
+ return 'SQL'
68
+ when /\.py$/
69
+ return 'Python'
70
+ when /\.js$/
71
+ return 'JavaScript'
72
+ when /\.c$/
73
+ return 'C'
74
+ when /\.sh$/
75
+ return 'bash'
76
+ when /(bash|bash_\w+)$/
77
+ return 'bash'
78
+ when /\.?(SKIP|gitignore|txt|csv|vim|gitmodules|gitattributes|jshintrc|gperf|vimrc|psqlrc|inputrc|screenrc)$/
79
+ return 'Text'
80
+ when /^(README|MANIFEST|Changes|Gemfile|Gemfile.lock)$/
81
+ return 'Text'
82
+ end
83
+
84
+ # Next, retrieve the file content and infer from that.
85
+ begin
86
+ content = git_repo.show(sha, filename)
87
+ rescue
88
+ pp "#{$!}"
89
+ end
90
+ return nil if content == nil || content == ''
91
+
92
+ first_line = content.split(/\n/)[0] || ''
93
+ case first_line
94
+ when /perl$/
95
+ return 'Perl'
96
+ end
97
+
98
+ # Fall back on the extension in last resort.
99
+ extension = /\.([^\.]+)$/.match(filename)
100
+ return filename if extension.nil?
101
+ return nil if extension[0] == 'lock'
102
+ return extension[0]
103
+ end
104
+
105
+ # Public: Parse the git logs for a repo.
106
+ #
107
+ # repo - A git repo object corresponding to the underlying repo.
108
+ #
109
+ # This method adds the metadata extracted for this repo to the instance
110
+ # variables collecting commit metadata.
111
+ #
112
+ def parse_repo(repo:)
113
+ git_repo = Git.open(repo, :log => @logger)
114
+
115
+ # Note: override the default of 30 for count(), nil gives the whole git log
116
+ # history.
117
+ git_repo.log(count = nil).each do |commit|
118
+ # Only include the authors specified on the command line.
119
+ next if !@author.include?(commit.author.email)
120
+
121
+ # Parse diff and analyze patches to detect language.
122
+ diff = commit.diff_parent.to_s
123
+ diff.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
124
+
125
+ patches = GitDiffParser.parse(diff)
126
+ patches.each do |patch|
127
+ body = patch.instance_variable_get :@body
128
+ language = self.class.determine_language(filename: patch.file, sha: commit.sha, git_repo: git_repo)
129
+ next if language == nil
130
+ @lines_by_language[language] ||=
131
+ {
132
+ 'added' => 0,
133
+ 'deleted' => 0
134
+ }
135
+
136
+ body.split(/\n/).each do |content|
137
+ if (/^[+-]/.match(content) && !/^[+-]\s+$/.match(content))
138
+ if (/^\+/.match(content))
139
+ @lines_by_language[language]['added'] += 1
140
+ elsif (/^\-/.match(content))
141
+ @lines_by_language[language]['deleted'] += 1
142
+ end
143
+ end
144
+ end
145
+ end
146
+
147
+ # Add to stats for monthly commit count.
148
+ # Note: months are zero-padded to allow easy sorting, even if it's more
149
+ # work for formatting later on.
150
+ @monthly_commits[commit.date.strftime("%Y-%m")] += 1
151
+
152
+ # Add to stats for total commits count.
153
+ @total_commits += 1
154
+ end
155
+ end
156
+
157
+ # Public: Get a range of months from the earliest commit to the latest.
158
+ #
159
+ # Returns an array of "YYYY-MM" strings.
160
+ #
161
+ def get_month_scale()
162
+ month_scale = []
163
+ commits_start = @monthly_commits.keys.sort.first.split('-').map { |x| x.to_i }
164
+ commits_end = @monthly_commits.keys.sort.last.split('-').map { |x| x.to_i }
165
+ commits_start[0].upto(commits_end[0]) do |year|
166
+ 1.upto(12) do |month|
167
+ next if month < commits_start[1] && year == commits_start[0]
168
+ next if month > commits_end[1] && year == commits_end[0]
169
+ month_scale << [year, month]
170
+ end
171
+ end
172
+
173
+ return month_scale
174
+ end
175
+
176
+ # Public: Generate a JSON representation of the parsed data.
177
+ #
178
+ # Returns: a JSON string.
179
+ #
180
+ def to_json()
181
+ formatted_monthly_commits = []
182
+ month_names = Date::ABBR_MONTHNAMES
183
+ self.get_month_scale.each do |frame|
184
+ display_key = month_names[frame[1]] + '-' + frame[0].to_s
185
+ data_key = sprintf('%s-%02d', frame[0], frame[1])
186
+ count = @monthly_commits[data_key].to_s
187
+ formatted_monthly_commits << { :month => display_key, :commits => count.to_s }
188
+ end
189
+
190
+ return JSON.pretty_generate(
191
+ {
192
+ :monthly_commits => formatted_monthly_commits,
193
+ :total_commits => @total_commits,
194
+ :lines_by_language => @lines_by_language,
195
+ }
196
+ )
197
+ end
198
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: git-commits-analyzer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Guillaume Aubert
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Parse git repos and collect commit statistics/data for a given author.
14
+ email: aubertg@cpan.org
15
+ executables:
16
+ - analyze_commits
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/analyze_commits
21
+ - lib/git-commits-analyzer.rb
22
+ - lib/git-commits-analyzer/utils.rb
23
+ homepage: http://rubygems.org/gems/
24
+ licenses:
25
+ - GPLv3
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 2.4.6
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: Analyze git commits
47
+ test_files: []