git_statistics 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ac2c8d9e4ff4611dfa6c6b2f9f664fb874a7201f
4
+ data.tar.gz: 9da9f644f174806900bd8b2a2921906bfc3e3989
5
+ SHA512:
6
+ metadata.gz: 381f1532eee3178c8a1459ec2f5ca37f1429786c56e4bae54c6139e2e9ed4fea6253fc4960d6b426e360e777c6adad1c48bffac53b1ce4fe9f93b5de27f5e51f
7
+ data.tar.gz: b7bb4f77e58b6f1bf73cfd09219651c0256a59a8a9b3de25a50c11948063a66c3df88856b14210709a6353f1fd0bf97d5eeaafea95f3d36d0a7587bf4d5fd7cf
data/bin/git-statistics CHANGED
@@ -3,4 +3,4 @@
3
3
  $:.unshift File.expand_path("../../lib", __FILE__)
4
4
  require 'git_statistics'
5
5
 
6
- GitStatistics::GitStatistics.new.execute
6
+ GitStatistics::CLI.new(Dir.pwd).execute
data/bin/git_statistics CHANGED
@@ -3,4 +3,4 @@
3
3
  $:.unshift File.expand_path("../../lib", __FILE__)
4
4
  require 'git_statistics'
5
5
 
6
- GitStatistics::GitStatistics.new.execute
6
+ GitStatistics::CLI.new(Dir.pwd).execute
@@ -1,11 +1,16 @@
1
- require 'ostruct'
2
- require 'optparse'
3
1
  require 'git_statistics/initialize'
4
2
 
5
3
  module GitStatistics
6
- class GitStatistics
7
- attr_reader :options
8
- def initialize
4
+ class CLI
5
+ attr_reader :repository, :options
6
+
7
+ DEFAULT_BRANCH = "master"
8
+
9
+ def initialize(dir)
10
+ repository_location = dir.nil? ? Rugged::Repository.discover(Dir.pwd) : Rugged::Repository.discover(dir)
11
+ @repository = Rugged::Repository.new(repository_location)
12
+ @collected = false
13
+ @collector = nil
9
14
  @options = OpenStruct.new(
10
15
  email: false,
11
16
  merges: false,
@@ -13,7 +18,7 @@ module GitStatistics
13
18
  update: false,
14
19
  sort: "commits",
15
20
  top: 0,
16
- branch: false,
21
+ branch: DEFAULT_BRANCH,
17
22
  verbose: false,
18
23
  debug: false,
19
24
  limit: 100
@@ -22,43 +27,43 @@ module GitStatistics
22
27
  end
23
28
 
24
29
  def execute
25
- if options.debug
26
- Log.level = Logger::DEBUG
27
- Log.use_debug
28
- elsif options.verbose
29
- Log.level = Logger::INFO
30
- end
30
+ determine_log_level
31
+ collect_and_only_update
32
+ fresh_collect! unless @collected
33
+ calculate!
34
+ output_results
35
+ end
31
36
 
32
- # Collect data (incremental or fresh) based on presence of old data
37
+ def collect_and_only_update
33
38
  if options.update
34
39
  # Ensure commit directory is present
35
- collector = Collector.new(options.limit, false, options.pretty)
36
- commits_directory = File.join(collector.repo_path, ".git_statistics")
40
+ @collector = Collector.new(repository, options.limit, false, options.pretty)
41
+ commits_directory = repository.workdir + ".git_statistics/"
37
42
  FileUtils.mkdir_p(commits_directory)
38
43
  file_count = Utilities.number_of_matching_files(commits_directory, /\d+\.json/) - 1
39
44
 
40
45
  if file_count >= 0
41
- time = Utilities.get_modified_time(commits_directory + "#{file_count}.json")
42
- # Only use --since if there is data present
43
- collector.collect(options.branch, "--since=\"#{time}\"")
44
- collected = true
46
+ time_since = Utilities.get_modified_time(commits_directory + "#{file_count}.json").to_s
47
+ @collector.collect({:branch => options.branch, :time_since => time_since})
48
+ @collected = true
45
49
  end
46
50
  end
51
+ end
47
52
 
48
- # If no data was collected as there was no present data then start fresh
49
- unless collected
50
- collector = Collector.new(options.limit, true, options.pretty)
51
- collector.collect(options.branch)
52
- end
53
-
54
- # Calculate statistics
55
- collector.commits.calculate_statistics(options.email, options.merges)
53
+ def calculate!
54
+ @collector.commits.calculate_statistics(options.email, options.merges)
55
+ end
56
56
 
57
- # Print results
58
- results = Formatters::Console.new(collector.commits)
57
+ def output_results
58
+ results = Formatters::Console.new(@collector.commits)
59
59
  puts results.print_summary(options.sort, options.email, options.top)
60
60
  end
61
61
 
62
+ def fresh_collect!
63
+ @collector = Collector.new(repository, options.limit, true, options.pretty)
64
+ @collector.collect({:branch => options.branch})
65
+ end
66
+
62
67
  def parse_options
63
68
  OptionParser.new do |opt|
64
69
  opt.version = VERSION
@@ -80,8 +85,8 @@ module GitStatistics
80
85
  opt.on "-t", "--top N", Float,"Show the top N authors in results" do |value|
81
86
  options.top = value
82
87
  end
83
- opt.on "-b", "--branch", "Use current branch for statistics (otherwise all branches)" do
84
- options.branch = true
88
+ opt.on "-b", "--branch BRANCH", "Use the specified branch for statistics (otherwise the master branch is used)" do |branch|
89
+ options.branch = branch
85
90
  end
86
91
  opt.on "-v", "--verbose", "Verbose output (shows INFO level log statements)" do
87
92
  options.verbose = true
@@ -94,6 +99,17 @@ module GitStatistics
94
99
  end
95
100
  end.parse!
96
101
  end
97
- end
98
102
 
103
+ private
104
+
105
+ def determine_log_level
106
+ if options.debug
107
+ Log.level = Logger::DEBUG
108
+ Log.use_debug
109
+ elsif options.verbose
110
+ Log.level = Logger::INFO
111
+ end
112
+ end
113
+
114
+ end
99
115
  end
@@ -1,217 +1,70 @@
1
1
  module GitStatistics
2
2
  class Collector
3
3
 
4
- attr_accessor :repo, :repo_path, :commits_path, :commits
4
+ attr_accessor :repo, :commits_path, :commits
5
5
 
6
- def initialize(limit, fresh, pretty)
7
- @repo = Utilities.get_repository
8
- @repo_path = File.expand_path("..", @repo.path)
9
- @commits_path = File.join(@repo_path, ".git_statistics")
6
+ def initialize(repo, limit, fresh, pretty)
7
+ @repo = repo
8
+ @commits_path = repo.workdir + ".git_statistics"
10
9
  @commits = Commits.new(@commits_path, fresh, limit, pretty)
11
10
  end
12
11
 
13
- def collect(branch, time_since = "", time_until = "")
14
- # Collect branches to use for git log
15
- branches = branch ? [] : Branches.all
12
+ def collect(options = {})
13
+ branch = options[:branch] ? options[:branch] : CLI::DEFAULT_BRANCH
14
+ branch_head = Rugged::Branch.lookup(repo, branch).tip
16
15
 
17
- # Create pipe for the git log to acquire commits
18
- pipe = Pipe.new("git --no-pager log #{branches.join(' ')} --date=iso --reverse"\
19
- " --no-color --find-copies-harder --numstat --encoding=utf-8"\
20
- " --summary #{time_since} #{time_until}"\
21
- " --format=\"%H,%an,%ae,%ad,%p\"")
16
+ walker = Rugged::Walker.new(repo)
17
+ walker.push(branch_head)
22
18
 
23
- # Use a buffer approach to queue up lines from the log for each commit
24
- buffer = []
25
- pipe.each do |line|
26
-
27
- # Extract the buffer (commit) when we match ','x5 in the log format (delimeter)
28
- if line.split(',').size == 5
29
-
30
- # Sometimes 'git log' doesn't populate the buffer (i.e., merges), try fallback option if so
31
- buffer = fall_back_collect_commit(buffer[0].split(',').first) if buffer.one?
32
-
33
- extract_commit(buffer) unless buffer.empty?
34
- buffer = []
35
-
36
- # Save commits to file if size exceeds limit or forced
19
+ walker.each_with_index do |commit, count|
20
+ if valid_commit?(commit, options)
21
+ extract_commit(commit, count + 1)
37
22
  @commits.flush_commits
38
23
  end
39
-
40
- buffer << line
41
24
  end
42
25
 
43
- # Extract the last commit
44
- extract_commit(buffer) unless buffer.empty?
45
26
  @commits.flush_commits(true)
46
27
  end
47
28
 
48
- def fall_back_collect_commit(sha)
49
- # Create pipe for the git log to acquire commits
50
- pipe = Pipe.new("git --no-pager show #{sha} --date=iso --reverse"\
51
- " --no-color --find-copies-harder --numstat --encoding=utf-8 "\
52
- "--summary --format=\"%H,%an,%ae,%ad,%p\"")
53
-
54
- # Check that the buffer has valid information (i.e., sha was valid)
55
- if !pipe.empty? && pipe.first.split(',').first == sha
56
- pipe.to_a
57
- else
58
- []
29
+ def valid_commit?(commit, options)
30
+ if !options[:time_since].nil?
31
+ return false unless commit.author[:time] > DateTime.parse(options[:time_since].to_s).to_time
59
32
  end
60
- end
61
-
62
- def acquire_commit_data(line)
63
- # Split up formated line
64
- commit_info = line.split(',')
65
33
 
66
- # Initialize commit data
67
- data = (@commits[commit_info[0]] ||= Hash.new(0))
68
- data[:author] = commit_info[1]
69
- data[:author_email] = commit_info[2]
70
- data[:time] = commit_info[3]
71
- data[:files] = []
72
-
73
- # Flag commit as merge if necessary (determined if two parents)
74
- if commit_info[4].nil? || commit_info[4].split(' ').one?
75
- data[:merge] = false
76
- else
77
- data[:merge] = true
34
+ if !options[:time_until].nil?
35
+ return false unless commit.author[:time] < DateTime.parse(options[:time_until].to_s).to_time
78
36
  end
79
37
 
80
- return {:sha => commit_info[0], :data => data}
38
+ return true
81
39
  end
82
40
 
83
- def extract_commit(buffer)
84
- # Acquire general commit information
85
- commit_data = acquire_commit_data(buffer[0])
86
-
87
- Log.info "Extracting #{commit_data[:sha]}"
88
-
89
- # Abort if the commit sha extracted form the buffer is invalid
90
- if commit_data[:sha].scan(/[\d|a-f]{40}/)[0].nil?
91
- Log.warn "Invalid buffer containing commit information"
92
- return
93
- end
94
-
95
- # Identify all changed files for this commit
96
- files = identify_changed_files(buffer[2..-1])
97
-
98
- # No files were changed in this commit, abort commit
99
- if files.nil?
100
- Log.debug "No files were changed"
101
- return
102
- end
103
-
104
- # Acquire blob for each changed file and process it
105
- files.each do |file|
106
- blob = get_blob(commit_data[:sha], file)
107
-
108
- # Only process blobs, or log the submodules and problematic files
109
- if blob.instance_of?(Grit::Blob)
110
- process_blob(commit_data[:data], blob, file)
111
- elsif blob.instance_of?(Grit::Submodule)
112
- Log.debug "Ignoring submodule #{blob.name}"
113
- else
114
- Log.warn "Problem processing file #{file[:file]}"
115
- end
116
- end
117
- return commit_data[:data]
118
- end
119
-
120
- def get_blob(sha, file)
121
- # Split up file for Grit navigation
122
- file = file[:file].split(File::Separator)
123
-
124
- # Acquire blob of the file for this specific commit
125
- blob = Utilities.find_blob_in_tree(@repo.tree(sha), file)
126
-
127
- # If we cannot find blob in current commit (deleted file), check previous commit
128
- if blob.nil? || blob.instance_of?(Grit::Tree)
129
- prev_commit = @repo.commits(sha).first.parents[0]
130
- return nil if prev_commit.nil?
41
+ def acquire_commit_meta(commit_summary)
42
+ # Initialize commit data
43
+ data = (@commits[commit_summary.oid] ||= Hash.new(0))
44
+
45
+ data[:author] = commit_summary.author[:name]
46
+ data[:author_email] = commit_summary.author[:email]
47
+ data[:time] = commit_summary.author[:time].to_s
48
+ data[:merge] = commit_summary.merge?
49
+ data[:additions] = commit_summary.additions
50
+ data[:deletions] = commit_summary.deletions
51
+ data[:net] = commit_summary.net
52
+ data[:added_files] = commit_summary.added_files
53
+ data[:deleted_files] = commit_summary.deleted_files
54
+ data[:modified_files] = commit_summary.modified_files
55
+ data[:files] = commit_summary.file_stats.map{ |file| file.to_json }
131
56
 
132
- prev_tree = @repo.tree(prev_commit.id)
133
- blob = Utilities.find_blob_in_tree(prev_tree, file)
134
- end
135
- return blob
57
+ return data
136
58
  end
137
59
 
138
- def identify_changed_files(buffer)
139
- return buffer if buffer.nil?
140
-
141
- # For each modification extract the details
142
- changed_files = []
143
- buffer.each do |line|
144
- extracted_line = CommitLineExtractor.new(line)
60
+ def extract_commit(commit, count)
61
+ Log.info "Extracting(#{count}) #{commit.oid}"
62
+ commit_summary = CommitSummary.new(@repo, commit)
145
63
 
146
- # Extract changed file information if it exists
147
- changed_file_information = extracted_line.changed
148
- if changed_file_information.any?
149
- changed_files << changed_file_information
150
- next # This line is processed, skip to next
151
- end
64
+ # Acquire meta information about commit
65
+ commit_data = acquire_commit_meta(commit_summary)
152
66
 
153
- # Extract details of create/delete files if it exists
154
- created_or_deleted = extracted_line.created_or_deleted
155
- if created_or_deleted.any?
156
- augmented = false
157
- # Augment changed file with create/delete information if possible
158
- changed_files.each do |file|
159
- if file[:file] == created_or_deleted[:file]
160
- file[:status] = created_or_deleted[:status]
161
- augmented = true
162
- break
163
- end
164
- end
165
- changed_files << created_or_deleted unless augmented
166
- next # This line is processed, skip to next
167
- end
168
-
169
- # Extract details of rename/copy files if it exists
170
- renamed_or_copied = extracted_line.renamed_or_copied
171
- if renamed_or_copied.any?
172
- augmented = false
173
- # Augment changed file with rename/copy information if possible
174
- changed_files.each do |file|
175
- if file[:file] == renamed_or_copied[:new_file]
176
- file[:status] = renamed_or_copied[:status]
177
- file[:old_file] = renamed_or_copied[:old_file]
178
- file[:similar] = renamed_or_copied[:similar]
179
- augmented = true
180
- break
181
- end
182
- end
183
- changed_files << renamed_or_copied unless augmented
184
- next # This line is processed, skip to next
185
- end
186
- end
187
-
188
- changed_files
189
- end
190
-
191
- def process_blob(data, blob, file)
192
- # Initialize a hash to hold information regarding the file
193
- file_hash = Hash.new(0)
194
- file_hash[:name] = file[:file]
195
- file_hash[:additions] = file[:additions]
196
- file_hash[:deletions] = file[:deletions]
197
- file_hash[:status] = file[:status]
198
-
199
- # Add file information to commit itself
200
- data[file[:status].to_sym] += 1 if file[:status] != nil
201
- data[:additions] += file[:additions]
202
- data[:deletions] += file[:deletions]
203
-
204
- # Acquire specifics on blob
205
- file_hash[:binary] = blob.binary?
206
- file_hash[:image] = blob.image?
207
- file_hash[:vendored] = blob.vendored?
208
- file_hash[:generated] = blob.generated?
209
-
210
- # Identify the language of the blob if possible
211
- file_hash[:language] = blob.language.nil? ? "Unknown" : blob.language.name
212
- data[:files] << file_hash
213
-
214
- return data
67
+ return commit_data
215
68
  end
216
69
 
217
70
  end
@@ -0,0 +1,86 @@
1
+ module GitStatistics
2
+ class CommitSummary < SimpleDelegator
3
+ def initialize(repo, commit)
4
+ super(commit)
5
+ @repo = repo
6
+ @diff = diff(commit.parents.first)
7
+ @patches = @diff.patches
8
+ end
9
+
10
+ # A Git commit is a merge if it has more than one parent
11
+ def merge?
12
+ parents.size > 1
13
+ end
14
+
15
+ # How many files were removed in this commit
16
+ def deleted_files
17
+ file_stats.select { |file| file.status == :deleted }.count
18
+ end
19
+
20
+ # How many files were added in this commit
21
+ def added_files
22
+ file_stats.select { |file| file.status == :added }.count
23
+ end
24
+
25
+ # How many files were modified (not added/deleted) in this commit
26
+ def modified_files
27
+ file_stats.select { |file| file.status == :modified }.count
28
+ end
29
+
30
+ # How many total additions in this commit?
31
+ def additions
32
+ commit_summary(:additions)
33
+ end
34
+
35
+ # How many total deletions in this commit?
36
+ def deletions
37
+ commit_summary(:deletions)
38
+ end
39
+
40
+ # What is the net # of lines changes in this commit?
41
+ def net
42
+ commit_summary(:net)
43
+ end
44
+
45
+ def file_stats
46
+ @cached_file_stats ||= diffstats.map { |diff| DiffSummary.new(@repo, diff) }
47
+ end
48
+
49
+ LanguageSummary = Struct.new(:name, :additions, :deletions, :net, :added_files, :deleted_files, :modified_files)
50
+
51
+ # Array of LanguageSummary objects (one for each language) for simple calculations
52
+ def languages
53
+ grouped_language_files.collect do |language, stats|
54
+ additions = summarize(stats, :additions)
55
+ deletions = summarize(stats, :deletions)
56
+ net = summarize(stats, :net)
57
+ LanguageSummary.new(language, additions, deletions, net, added_files, deleted_files, modified_files)
58
+ end
59
+ end
60
+
61
+ # Group file statistics by language
62
+ def grouped_language_files
63
+ file_stats.group_by(&:language)
64
+ end
65
+
66
+ # Files touched in this commit
67
+ def filenames
68
+ file_stats.map(&:filename)
69
+ end
70
+
71
+ private
72
+
73
+ def summarize(stats, what)
74
+ stats.map(&what).inject(0, :+)
75
+ end
76
+
77
+ def commit_summary(what)
78
+ summarize(file_stats, what)
79
+ end
80
+
81
+ def diffstats
82
+ @patches
83
+ end
84
+
85
+ end
86
+ end