git_statistics 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -2,6 +2,8 @@ source 'https://rubygems.org'
2
2
 
3
3
  gem "json"
4
4
  gem "trollop"
5
+ gem "grit"
6
+ gem "github-linguist"
5
7
 
6
8
  group :test do
7
9
  gem "simplecov"
data/Gemfile.lock CHANGED
@@ -1,28 +1,50 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
+ blankslate (2.1.2.4)
5
+ charlock_holmes (0.6.9)
4
6
  diff-lcs (1.1.3)
5
- json (1.6.6)
6
- multi_json (1.2.0)
7
+ escape_utils (0.2.4)
8
+ ffi (1.0.11)
9
+ github-linguist (2.3.3)
10
+ charlock_holmes (~> 0.6.6)
11
+ escape_utils (~> 0.2.3)
12
+ mime-types (~> 1.19)
13
+ pygments.rb (>= 0.2.13)
14
+ grit (2.5.0)
15
+ diff-lcs (~> 1.1)
16
+ mime-types (~> 1.15)
17
+ posix-spawn (~> 0.3.6)
18
+ json (1.7.5)
19
+ mime-types (1.19)
20
+ multi_json (1.3.6)
21
+ posix-spawn (0.3.6)
22
+ pygments.rb (0.2.13)
23
+ rubypython (~> 0.5.3)
7
24
  rake (0.9.2.2)
8
- rspec (2.9.0)
9
- rspec-core (~> 2.9.0)
10
- rspec-expectations (~> 2.9.0)
11
- rspec-mocks (~> 2.9.0)
12
- rspec-core (2.9.0)
13
- rspec-expectations (2.9.1)
25
+ rspec (2.11.0)
26
+ rspec-core (~> 2.11.0)
27
+ rspec-expectations (~> 2.11.0)
28
+ rspec-mocks (~> 2.11.0)
29
+ rspec-core (2.11.1)
30
+ rspec-expectations (2.11.3)
14
31
  diff-lcs (~> 1.1.3)
15
- rspec-mocks (2.9.0)
16
- simplecov (0.6.1)
32
+ rspec-mocks (2.11.2)
33
+ rubypython (0.5.3)
34
+ blankslate (>= 2.1.2.3)
35
+ ffi (~> 1.0.7)
36
+ simplecov (0.6.4)
17
37
  multi_json (~> 1.0)
18
38
  simplecov-html (~> 0.5.3)
19
39
  simplecov-html (0.5.3)
20
- trollop (1.16.2)
40
+ trollop (2.0)
21
41
 
22
42
  PLATFORMS
23
43
  ruby
24
44
 
25
45
  DEPENDENCIES
46
+ github-linguist
47
+ grit
26
48
  json
27
49
  rake
28
50
  rspec
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  [![Build Status](https://secure.travis-ci.org/kevinjalbert/git_statistics.png?branch=master)](http://travis-ci.org/kevinjalbert/git_statistics)
2
+ [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/kevinjalbert/git_statistics)
2
3
 
3
4
  # Instructions
4
5
 
@@ -12,19 +13,23 @@
12
13
  3. Run tests `bundle exec rake`
13
14
  4. Build and install local gem `bundle exec rake install`
14
15
 
15
- # Statistics
16
+ # Functions
16
17
 
17
- The following statistics are collected (organized by author name or author email):
18
+ This gem will analyze every commit within a git repository using `git log` and [mojombo/grit](https://github.com/mojombo/grit). The following author statistics in relation to the git repository are collected and displayed:
18
19
 
19
20
  * Total number of commits
20
21
  * Total number of merge commits
21
- * Total source line insertions
22
+ * Total source line additions
22
23
  * Total source line deletions
23
24
  * Total file creates
24
25
  * Total file deletes
25
26
  * Total file renames
26
27
  * Total file copies
27
28
 
29
+ This gem also uses [github/linguist](https://github.com/github/linguist) to determine the langugae of each individual file within commits. This augments the reported statistics by breaking down the author's statistics by languages.
30
+
31
+ This gem also has the ability to save the acquired data into a JSON file (in either a compressed or pretty format). If a saved file is present for the repository you can use the gem to load the data from the file, thus saving time for re-displaying the statistics using a different set of display flags (what statistic to sort on, number of authors to show, consider merges, etc...). In the event that a repository updates with new commits the gem allows you to update the saved file with the new commits.
32
+
28
33
  ## Contributing
29
34
 
30
35
  1. Fork it
@@ -17,4 +17,6 @@ Gem::Specification.new do |gem|
17
17
  gem.required_ruby_version = '>= 1.9.1'
18
18
  gem.add_dependency('json')
19
19
  gem.add_dependency('trollop')
20
+ gem.add_dependency('grit')
21
+ gem.add_dependency('github-linguist')
20
22
  end
@@ -0,0 +1,5 @@
1
+ module Grit
2
+ class Blob
3
+ include Linguist::BlobHelper
4
+ end
5
+ end
@@ -1,29 +1,47 @@
1
1
  module GitStatistics
2
2
  class Collector
3
3
 
4
- attr_accessor :commits
4
+ attr_accessor :commits, :verbose
5
5
 
6
- def initialize
6
+ def initialize(verbose)
7
7
  @commits = Commits.new
8
+ @verbose = verbose
9
+
10
+ # Connect to git repository if it exists
11
+ directory = Pathname.new(Dir.pwd)
12
+ while @repo == nil && !directory.root? do
13
+ begin
14
+ @repo = Grit::Repo.new(directory)
15
+ rescue
16
+ directory = directory.parent
17
+ end
18
+ end
19
+
20
+ # Abort if no git repository is found
21
+ if @repo == nil
22
+ raise ("No git Repository Found")
23
+ end
8
24
  end
9
25
 
10
26
  def collect(branch, since="")
11
-
12
27
  # Collect branches to use for git log
13
28
  branches = collect_branches
14
29
  branches = ["", ""] if branch
15
30
 
31
+ # Create pipe for the git log to acquire commits
16
32
  pipe = open("|git --no-pager log #{branches.join(' ')} --date=iso --reverse"\
17
- " --no-color --numstat --summary #{since}"\
18
- " --format=\"%H,%an,%ae,%ad,%p\"")
33
+ " --no-color --find-copies-harder --numstat --encoding=utf-8 "\
34
+ "--summary #{since} --format=\"%H,%an,%ae,%ad,%p\"")
19
35
 
36
+ # Use a buffer approach to queue up lines from the log for each commit
20
37
  buffer = []
21
38
  pipe.each do |line|
22
39
 
23
- line = line.force_encoding("ISO-8859-1").encode("UTF-8")
40
+ line = clean_string(line)
24
41
 
25
- if line.split(',').size == 5 # Matches the number of ',' in the format
26
- extract_buffer(buffer) if not buffer.empty?
42
+ # Extract the buffer (commit) when we match ','x5 in the log format (delimeter)
43
+ if line.split(',').size == 5
44
+ extract_commit(buffer) if not buffer.empty?
27
45
  buffer = []
28
46
  end
29
47
 
@@ -31,125 +49,359 @@ module GitStatistics
31
49
  end
32
50
 
33
51
  # Extract the last commit
34
- extract_buffer(buffer) if not buffer.empty?
52
+ extract_commit(buffer) if not buffer.empty?
35
53
  end
36
54
 
37
55
  def collect_branches
38
-
56
+ # Create pipe for git log to acquire branches
39
57
  pipe = open("|git --no-pager branch --no-color")
40
58
 
59
+ # Acquire all availble branches from repository
41
60
  branches = []
42
61
  pipe.each do |line|
43
62
 
44
- # Remove the '* ' leading the current branch
63
+ # Remove the '*' leading the current branch
45
64
  line = line[1..-1] if line[0] == '*'
46
- branches << line.strip
65
+ branches << clean_string(line)
47
66
  end
48
67
 
49
68
  return branches
50
69
  end
51
70
 
52
- def extract_buffer(buffer)
53
-
71
+ def extract_commit(buffer)
72
+ # Acquire general commit information
54
73
  commit_info = buffer[0].split(',')
74
+ sha = commit_info[0]
55
75
 
56
- commit = (@commits[ commit_info[0] ] ||= Hash.new)
57
- commit[:author] = commit_info[1]
58
- commit[:author_email] = commit_info[2]
59
- commit[:time] = commit_info[3]
60
- commit[:insertions] = 0
61
- commit[:deletions] = 0
62
- commit[:creates] = 0
63
- commit[:deletes] = 0
64
- commit[:renames] = 0
65
- commit[:copies] = 0
76
+ # Initialize commit data
77
+ data = (@commits[sha] ||= Hash.new(0))
78
+ data[:author] = commit_info[1]
79
+ data[:author_email] = commit_info[2]
80
+ data[:time] = commit_info[3]
81
+ data[:files] = []
66
82
 
83
+ # Flag commit as merge if nessecary (determined if two parents)
67
84
  if commit_info[4] == nil or commit_info[4].split(' ').size == 1
68
- commit[:merge] = false
85
+ data[:merge] = false
69
86
  else
70
- commit[:merge] = true
87
+ data[:merge] = true
88
+ end
89
+
90
+ puts "Extracting #{sha}" if @verbose
91
+
92
+ # Identify all changed files for this commit
93
+ files = identify_changed_files(buffer)
94
+
95
+ # Acquire blob for each changed file and process it
96
+ files.each do |file|
97
+ blob = get_blob(sha, file)
98
+
99
+ # Only process blobs, otherwise log problematic file/blob
100
+ if blob.instance_of?(Grit::Blob)
101
+ process_blob(data, blob, file)
102
+ else
103
+ puts "Problem processing file #{file[:file]}"
104
+ end
71
105
  end
106
+ end
107
+
108
+ def get_blob(sha, file)
109
+ # Split up file for Grit navigation
110
+ file = file[:file].split(File::Separator)
111
+
112
+ # Acquire blob of the file for this specific commit
113
+ blob = find_blob_in_tree(sha, @repo.tree(sha), file)
72
114
 
73
- # Only extract diff details if they exist
74
- if buffer.size > 1
115
+ # If we cannot find blob in current commit (deleted file), check previous commit
116
+ if blob == nil || blob.instance_of?(Grit::Tree)
117
+ prev_commit = @repo.commits(sha).first.parents[0]
118
+ return nil if prev_commit == nil
75
119
 
120
+ prev_tree = @repo.tree(prev_commit.id)
121
+ blob = find_blob_in_tree(prev_commit.id, prev_tree, file)
122
+ end
123
+ return blob
124
+ end
125
+
126
+ def identify_changed_files(buffer)
127
+ # If the buffer is larger than 2 lines then we have per-file details to process
128
+ changed_files = []
129
+ if buffer.size > 2
130
+
131
+ # For each modification extract the details
76
132
  buffer[2..-1].each do |line|
77
133
 
78
- next if extract_changes(commit, line)
79
- next if extract_create_delete_file(commit, line)
80
- next if extract_rename_copy_file(commit, line)
134
+ # Extract changed file information if it exists
135
+ data = extract_change_file(line)
136
+ if data != nil
137
+ changed_files << data
138
+ next # This line is processed, skip to next
139
+ end
140
+
141
+ # Extract details of create/delete files if it exists
142
+ data = extract_create_delete_file(line)
143
+ if data != nil
144
+ augmented = false
145
+ # Augment changed file with create/delete information if possible
146
+ changed_files.each do |file|
147
+ if file[:file] == data[:file]
148
+ file[:status] = data[:status]
149
+ augmented = true
150
+ break
151
+ end
152
+ end
153
+ changed_files << data if !augmented
154
+ next # This line is processed, skip to next
155
+ end
156
+
157
+ # Extract details of rename/copy files if it exists
158
+ data = extract_rename_copy_file(line)
159
+ if data != nil
160
+ augmented = false
161
+ # Augment changed file with rename/copy information if possible
162
+ changed_files.each do |file|
163
+ if file[:file] == data[:new_file]
164
+ file[:status] = data[:status]
165
+ file[:old_file] = data[:old_file]
166
+ file[:similar] = data[:similar]
167
+ augmented = true
168
+ break
169
+ end
170
+ end
171
+ changed_files << data if !augmented
172
+ next # This line is processed, skip to next
173
+ end
174
+ end
175
+ end
176
+ return changed_files
177
+ end
178
+
179
+ def find_blob_in_tree(sha, tree, file)
180
+ # Check If cannot find tree in commit or if we found a submodule as the changed file
181
+ if tree == nil
182
+ return nil
183
+ elsif tree.instance_of?(Grit::Submodule)
184
+ return tree
185
+ end
186
+
187
+ # If the blob is within the current directory (tree)
188
+ if file.size == 1
189
+ blob = tree / file.first
190
+
191
+ # Check if blob is nil (could not find changed file in tree)
192
+ if blob == nil
193
+
194
+ # Try looking for submodules as they cannot be found using tree / file notation
195
+ tree.contents.each do |content|
196
+ if file.first == content.name
197
+ return nil
198
+ end
199
+ end
200
+
201
+ # Exit through recusion with the base case of a nil tree/blob
202
+ return find_blob_in_tree(sha, blob, file)
203
+ end
204
+ return blob
205
+ else
206
+ # Explore deeper in the tree to find the blob of the changed file
207
+ return find_blob_in_tree(sha, tree / file.first, file[1..-1])
208
+ end
209
+ end
81
210
 
211
+ def process_blob(data, blob, file)
212
+ # Initialize a hash to hold information regarding the file
213
+ file_hash = Hash.new(0)
214
+ file_hash[:name] = file[:file]
215
+ file_hash[:additions] = file[:additions]
216
+ file_hash[:deletions] = file[:deletions]
217
+ file_hash[:status] = file[:status]
218
+
219
+ # Add file information to commit itself
220
+ data[file[:status].to_sym] += 1 if file[:status] != nil
221
+ data[:additions] += file[:additions]
222
+ data[:deletions] += file[:deletions]
223
+
224
+ # Handle submodule if present, otherwise acquire specifics on blob
225
+ if blob.instance_of?(Grit::Submodule)
226
+ file_hash[:language] = "Submodule"
227
+ else
228
+ file_hash[:binary] = blob.binary?
229
+ file_hash[:image] = blob.image?
230
+ file_hash[:vendored] = blob.vendored?
231
+ file_hash[:generated] = blob.generated?
232
+
233
+ # Identify the language of the blob if possible
234
+ if blob.language == nil
235
+ file_hash[:language] = "Unknown"
236
+ else
237
+ file_hash[:language] = blob.language.name
82
238
  end
83
239
  end
240
+ data[:files] << file_hash
241
+ end
242
+
243
+ def clean_string(file_name)
244
+ #if file_name.include?("foo")
245
+ #blob = @repo.tree("1ec5c2674fd792e8f9ddbff5afcacc3e1f7c506d") / "actionpack" / "test" / "fixtures" / "public" / "foo"
246
+ #ap "=-=-=-=-=-=-="
247
+ #ap file_name
248
+ #ap "--------------------"
249
+ #ap blob.contents[2].name
250
+ #ap "=-=-=-=-=-=-="
251
+ #end
252
+ # Clean up a string and force utf-8 encoding
253
+ return file_name.strip.gsub('"', '').gsub("\\\\", "\\").force_encoding("utf-8")
84
254
  end
85
255
 
86
- def extract_changes(commit, line)
87
- changes = line.scan( /(\d+)\s(\d+)\s(.*)/ )[0]
256
+ def extract_change_file(line)
257
+ # Use regex to detect a rename/copy changed file | 1 2 /path/{test => new}/file.txt
258
+ changes = line.scan(/^([-|\d]+)\s+([-|\d]+)\s+(.+)\s+=>\s+(.+)/)[0]
259
+ if changes != nil and changes.size == 4
260
+ # Split up the file into the old and new file
261
+ split_file = split_old_new_file(changes[2], changes[3])
262
+ return {:additions => changes[0].to_i,
263
+ :deletions => changes[1].to_i,
264
+ :file => clean_string(split_file[:new_file]),
265
+ :old_file => clean_string(split_file[:old_file])}
266
+ end
88
267
 
268
+ # Use regex to detect a changed file | 1 2 /path/test/file.txt
269
+ changes = line.scan(/^([-|\d]+)\s+([-|\d]+)\s+(.+)/)[0]
89
270
  if changes != nil and changes.size == 3
90
- commit[:insertions] += changes[0].to_i
91
- commit[:deletions] += changes[1].to_i
92
- return true
271
+ return {:additions => changes[0].to_i,
272
+ :deletions => changes[1].to_i,
273
+ :file => clean_string(changes[2])}
93
274
  end
275
+ return nil
94
276
  end
95
277
 
96
- def extract_create_delete_file(commit, line)
97
- changes = line.scan(/(create|delete) mode \d+ ([^\\\n]*)/)[0]
98
-
278
+ def extract_create_delete_file(line)
279
+ # Use regex to detect a create/delete file | create mode 100644 /path/test/file.txt
280
+ changes = line.scan(/^(create|delete) mode \d+ ([^\\\n]*)/)[0]
99
281
  if changes != nil and changes.size == 2
100
- commit[:creates] += 1 if changes[0] == "create"
101
- commit[:deletes] += 1 if changes[0] == "delete"
102
- return true
282
+ return {:status => clean_string(changes[0]),
283
+ :file => clean_string(changes[1])}
103
284
  end
285
+ return nil
104
286
  end
105
287
 
106
- def extract_rename_copy_file(commit, line)
107
- changes = line.scan(/(rename|copy)([^(]*)/)[0]
288
+ def extract_rename_copy_file(line)
289
+ # Use regex to detect a rename/copy file | copy /path/{test => new}/file.txt
290
+ changes = line.scan(/^(rename|copy)\s+(.+)\s+=>\s+(.+)\s+\((\d+)/)[0]
291
+ if changes != nil and changes.size == 4
292
+ # Split up the file into the old and new file
293
+ split_file = split_old_new_file(changes[1], changes[2])
294
+ return {:status => clean_string(changes[0]),
295
+ :old_file => clean_string(split_file[:old_file]),
296
+ :new_file => clean_string(split_file[:new_file]),
297
+ :similar => changes[3].to_i}
298
+ end
299
+ return nil
300
+ end
108
301
 
109
- if changes != nil and changes.size == 2
110
- commit[:renames] += 1 if changes[0] == "rename"
111
- commit[:copies] += 1 if changes[0] == "copy"
302
+ def split_old_new_file(old, new)
303
+ # Split the old and new chunks up (separted by the =>)
304
+ split_old = old.split('{')
305
+ split_new = new.split('}')
306
+
307
+ # Handle recombine the file splits into their whole paths)
308
+ if split_old.size == 1 && split_new.size == 1
309
+ old_file = split_old[0]
310
+ new_file = split_new[0]
311
+ elsif split_new.size == 1
312
+ old_file = split_old[0] + split_old[1] + split_new[0]
313
+ new_file = split_old[0] + split_new[0]
314
+ elsif split_old.size == 1
315
+ old_file = split_old[0] + split_new[1]
316
+ new_file = split_old[0] + split_new[0] + split_new[1]
317
+ else
318
+ old_file = split_old[0] + split_old[1] + split_new[1]
319
+ new_file = split_old[0] + split_new[0] + split_new[1]
112
320
  end
113
- return true
321
+
322
+ # Return files, yet remove the '//' if present from combining splits
323
+ return {:old_file => old_file.gsub('//', '/'),
324
+ :new_file => new_file.gsub('//', '/')}
114
325
  end
115
326
 
116
327
  def print_summary(sort_type, email, n=0)
328
+ # Default to a 0 if given a negative number to display
117
329
  n = 0 if n < 0
118
330
 
119
- data = @commits.author_top_n_type(email, sort_type, n)
120
-
331
+ # Acquire data based on sorty type and top # to show
332
+ data = @commits.author_top_n_type(sort_type, n)
121
333
  if data == nil
122
- puts "ERROR: Parameter for --sort is not valid"
123
- return
334
+ raise "Parameter for --sort is not valid"
124
335
  end
125
336
 
126
- # Find the longest name/email (used for string formatting)
127
- total_authors = @commits.author_list.length
128
- author_length = 17
337
+ # Acquire formatting pattern for output
338
+ author_length = find_longest_author(data)
339
+ language_length = find_longest_language(data)
340
+ pattern = "%-#{author_length}s | %-#{language_length}s | %7s | %9s | %9s | %7s | %7s | %7s | %6s | %6s |"
341
+
342
+ # Print query/header information
343
+ print_header(pattern, sort_type, n, author_length, language_length)
344
+
345
+ # Print per author information
129
346
  data.each do |key,value|
130
- author_length = key.length if key.length > author_length
347
+ puts pattern % [key, "", value[:commits], value[:additions],
348
+ value[:deletions], value[:create], value[:delete],
349
+ value[:rename], value[:copy], value[:merges]]
350
+ print_language_data(pattern, value)
351
+ end
352
+
353
+ # Reprint query/header for repository information
354
+ print_header(pattern, sort_type, n, author_length, language_length)
355
+ data = @commits.totals
356
+ puts pattern % ["Repository Totals", "", data[:commits],
357
+ data[:additions], data[:deletions], data[:create],
358
+ data[:delete], data[:rename], data[:copy], data[:merges]]
359
+ print_language_data(pattern, data)
360
+ end
361
+
362
+ def print_language_data(pattern, data)
363
+ # Print information of each language for the data
364
+ data[:languages].each do |key,value|
365
+ puts pattern % ["", key, "", value[:additions], value[:deletions],
366
+ value[:create], value[:delete], value[:rename],
367
+ value[:copy], value[:merges]]
131
368
  end
369
+ end
132
370
 
133
- # Print header information
371
+ def print_header(pattern, sort_type, n, author_length, language_length)
372
+ total_authors = @commits.author_list.length
373
+
374
+ # Print summary information of displayed results
134
375
  if n > 0 and n < total_authors
135
- puts "Top #{n} authors(#{total_authors}) sorted by #{sort_type.to_s}\n\n"
376
+ puts "\nTop #{n} authors(#{total_authors}) sorted by #{sort_type.to_s}\n"
136
377
  else
137
- puts "All authors(#{total_authors}) sorted by #{sort_type.to_s}\n\n"
378
+ puts "\nAll authors(#{total_authors}) sorted by #{sort_type.to_s}\n"
138
379
  end
139
380
 
140
- pattern = "%-#{author_length}s|%7s|%10s|%9s|%7s|%7s|%7s|%6s|%6s|"
141
- puts pattern % ['Name/email', 'commits', 'insertions', 'deletions', 'creates', 'deletes', 'renames', 'copies', 'merges']
142
- puts "-"*68 + "-"*author_length
381
+ # Print column headers
382
+ puts "-"*87 + "-"*author_length + "-"*language_length
383
+ puts pattern % ['Name/Email', 'Language', 'Commits', 'Additions', 'Deletions', 'Creates', 'Deletes', 'Renames', 'Copies', 'Merges']
384
+ puts "-"*87 + "-"*author_length + "-"*language_length
385
+ end
143
386
 
387
+ def find_longest_author(data)
388
+ # Find the longest author name/email (for string formatting)
389
+ total_authors = @commits.author_list.length
390
+ author_length = 17
144
391
  data.each do |key,value|
145
- puts pattern % [key, value[:commits], value[:insertions], value[:deletions],
146
- value[:creates], value[:deletes], value[:renames], value[:copies], value[:merges]]
392
+ author_length = key.length if key.length > author_length
147
393
  end
394
+ return author_length
395
+ end
148
396
 
149
- puts "-"*68 + "-"*author_length
150
- puts pattern % ["Repository Totals", @commits.totals[:commits],
151
- @commits.totals[:insertions], @commits.totals[:deletions], @commits.totals[:creates],
152
- @commits.totals[:deletes], @commits.totals[:renames], @commits.totals[:copies], @commits.totals[:merges]]
397
+ def find_longest_language(data)
398
+ # Find the longest language name (for string formatting)
399
+ total_language = @commits.language_list.length
400
+ language_length = 9
401
+ @commits.language_list.each do |key,value|
402
+ language_length = key.length if key.length > language_length
403
+ end
404
+ return language_length
153
405
  end
154
406
  end
155
407
  end