git_statistics 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -2,6 +2,8 @@ source 'https://rubygems.org'
2
2
 
3
3
  gem "json"
4
4
  gem "trollop"
5
+ gem "grit"
6
+ gem "github-linguist"
5
7
 
6
8
  group :test do
7
9
  gem "simplecov"
data/Gemfile.lock CHANGED
@@ -1,28 +1,50 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
+ blankslate (2.1.2.4)
5
+ charlock_holmes (0.6.9)
4
6
  diff-lcs (1.1.3)
5
- json (1.6.6)
6
- multi_json (1.2.0)
7
+ escape_utils (0.2.4)
8
+ ffi (1.0.11)
9
+ github-linguist (2.3.3)
10
+ charlock_holmes (~> 0.6.6)
11
+ escape_utils (~> 0.2.3)
12
+ mime-types (~> 1.19)
13
+ pygments.rb (>= 0.2.13)
14
+ grit (2.5.0)
15
+ diff-lcs (~> 1.1)
16
+ mime-types (~> 1.15)
17
+ posix-spawn (~> 0.3.6)
18
+ json (1.7.5)
19
+ mime-types (1.19)
20
+ multi_json (1.3.6)
21
+ posix-spawn (0.3.6)
22
+ pygments.rb (0.2.13)
23
+ rubypython (~> 0.5.3)
7
24
  rake (0.9.2.2)
8
- rspec (2.9.0)
9
- rspec-core (~> 2.9.0)
10
- rspec-expectations (~> 2.9.0)
11
- rspec-mocks (~> 2.9.0)
12
- rspec-core (2.9.0)
13
- rspec-expectations (2.9.1)
25
+ rspec (2.11.0)
26
+ rspec-core (~> 2.11.0)
27
+ rspec-expectations (~> 2.11.0)
28
+ rspec-mocks (~> 2.11.0)
29
+ rspec-core (2.11.1)
30
+ rspec-expectations (2.11.3)
14
31
  diff-lcs (~> 1.1.3)
15
- rspec-mocks (2.9.0)
16
- simplecov (0.6.1)
32
+ rspec-mocks (2.11.2)
33
+ rubypython (0.5.3)
34
+ blankslate (>= 2.1.2.3)
35
+ ffi (~> 1.0.7)
36
+ simplecov (0.6.4)
17
37
  multi_json (~> 1.0)
18
38
  simplecov-html (~> 0.5.3)
19
39
  simplecov-html (0.5.3)
20
- trollop (1.16.2)
40
+ trollop (2.0)
21
41
 
22
42
  PLATFORMS
23
43
  ruby
24
44
 
25
45
  DEPENDENCIES
46
+ github-linguist
47
+ grit
26
48
  json
27
49
  rake
28
50
  rspec
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  [![Build Status](https://secure.travis-ci.org/kevinjalbert/git_statistics.png?branch=master)](http://travis-ci.org/kevinjalbert/git_statistics)
2
+ [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/kevinjalbert/git_statistics)
2
3
 
3
4
  # Instructions
4
5
 
@@ -12,19 +13,23 @@
12
13
  3. Run tests `bundle exec rake`
13
14
  4. Build and install local gem `bundle exec rake install`
14
15
 
15
- # Statistics
16
+ # Functions
16
17
 
17
- The following statistics are collected (organized by author name or author email):
18
+ This gem will analyze every commit within a git repository using `git log` and [mojombo/grit](https://github.com/mojombo/grit). The following author statistics in relation to the git repository are collected and displayed:
18
19
 
19
20
  * Total number of commits
20
21
  * Total number of merge commits
21
- * Total source line insertions
22
+ * Total source line additions
22
23
  * Total source line deletions
23
24
  * Total file creates
24
25
  * Total file deletes
25
26
  * Total file renames
26
27
  * Total file copies
27
28
 
29
+ This gem also uses [github/linguist](https://github.com/github/linguist) to determine the langugae of each individual file within commits. This augments the reported statistics by breaking down the author's statistics by languages.
30
+
31
+ This gem also has the ability to save the acquired data into a JSON file (in either a compressed or pretty format). If a saved file is present for the repository you can use the gem to load the data from the file, thus saving time for re-displaying the statistics using a different set of display flags (what statistic to sort on, number of authors to show, consider merges, etc...). In the event that a repository updates with new commits the gem allows you to update the saved file with the new commits.
32
+
28
33
  ## Contributing
29
34
 
30
35
  1. Fork it
@@ -17,4 +17,6 @@ Gem::Specification.new do |gem|
17
17
  gem.required_ruby_version = '>= 1.9.1'
18
18
  gem.add_dependency('json')
19
19
  gem.add_dependency('trollop')
20
+ gem.add_dependency('grit')
21
+ gem.add_dependency('github-linguist')
20
22
  end
@@ -0,0 +1,5 @@
1
+ module Grit
2
+ class Blob
3
+ include Linguist::BlobHelper
4
+ end
5
+ end
@@ -1,29 +1,47 @@
1
1
  module GitStatistics
2
2
  class Collector
3
3
 
4
- attr_accessor :commits
4
+ attr_accessor :commits, :verbose
5
5
 
6
- def initialize
6
+ def initialize(verbose)
7
7
  @commits = Commits.new
8
+ @verbose = verbose
9
+
10
+ # Connect to git repository if it exists
11
+ directory = Pathname.new(Dir.pwd)
12
+ while @repo == nil && !directory.root? do
13
+ begin
14
+ @repo = Grit::Repo.new(directory)
15
+ rescue
16
+ directory = directory.parent
17
+ end
18
+ end
19
+
20
+ # Abort if no git repository is found
21
+ if @repo == nil
22
+ raise ("No git Repository Found")
23
+ end
8
24
  end
9
25
 
10
26
  def collect(branch, since="")
11
-
12
27
  # Collect branches to use for git log
13
28
  branches = collect_branches
14
29
  branches = ["", ""] if branch
15
30
 
31
+ # Create pipe for the git log to acquire commits
16
32
  pipe = open("|git --no-pager log #{branches.join(' ')} --date=iso --reverse"\
17
- " --no-color --numstat --summary #{since}"\
18
- " --format=\"%H,%an,%ae,%ad,%p\"")
33
+ " --no-color --find-copies-harder --numstat --encoding=utf-8 "\
34
+ "--summary #{since} --format=\"%H,%an,%ae,%ad,%p\"")
19
35
 
36
+ # Use a buffer approach to queue up lines from the log for each commit
20
37
  buffer = []
21
38
  pipe.each do |line|
22
39
 
23
- line = line.force_encoding("ISO-8859-1").encode("UTF-8")
40
+ line = clean_string(line)
24
41
 
25
- if line.split(',').size == 5 # Matches the number of ',' in the format
26
- extract_buffer(buffer) if not buffer.empty?
42
+ # Extract the buffer (commit) when we match ','x5 in the log format (delimeter)
43
+ if line.split(',').size == 5
44
+ extract_commit(buffer) if not buffer.empty?
27
45
  buffer = []
28
46
  end
29
47
 
@@ -31,125 +49,359 @@ module GitStatistics
31
49
  end
32
50
 
33
51
  # Extract the last commit
34
- extract_buffer(buffer) if not buffer.empty?
52
+ extract_commit(buffer) if not buffer.empty?
35
53
  end
36
54
 
37
55
  def collect_branches
38
-
56
+ # Create pipe for git log to acquire branches
39
57
  pipe = open("|git --no-pager branch --no-color")
40
58
 
59
+ # Acquire all availble branches from repository
41
60
  branches = []
42
61
  pipe.each do |line|
43
62
 
44
- # Remove the '* ' leading the current branch
63
+ # Remove the '*' leading the current branch
45
64
  line = line[1..-1] if line[0] == '*'
46
- branches << line.strip
65
+ branches << clean_string(line)
47
66
  end
48
67
 
49
68
  return branches
50
69
  end
51
70
 
52
- def extract_buffer(buffer)
53
-
71
+ def extract_commit(buffer)
72
+ # Acquire general commit information
54
73
  commit_info = buffer[0].split(',')
74
+ sha = commit_info[0]
55
75
 
56
- commit = (@commits[ commit_info[0] ] ||= Hash.new)
57
- commit[:author] = commit_info[1]
58
- commit[:author_email] = commit_info[2]
59
- commit[:time] = commit_info[3]
60
- commit[:insertions] = 0
61
- commit[:deletions] = 0
62
- commit[:creates] = 0
63
- commit[:deletes] = 0
64
- commit[:renames] = 0
65
- commit[:copies] = 0
76
+ # Initialize commit data
77
+ data = (@commits[sha] ||= Hash.new(0))
78
+ data[:author] = commit_info[1]
79
+ data[:author_email] = commit_info[2]
80
+ data[:time] = commit_info[3]
81
+ data[:files] = []
66
82
 
83
+ # Flag commit as merge if nessecary (determined if two parents)
67
84
  if commit_info[4] == nil or commit_info[4].split(' ').size == 1
68
- commit[:merge] = false
85
+ data[:merge] = false
69
86
  else
70
- commit[:merge] = true
87
+ data[:merge] = true
88
+ end
89
+
90
+ puts "Extracting #{sha}" if @verbose
91
+
92
+ # Identify all changed files for this commit
93
+ files = identify_changed_files(buffer)
94
+
95
+ # Acquire blob for each changed file and process it
96
+ files.each do |file|
97
+ blob = get_blob(sha, file)
98
+
99
+ # Only process blobs, otherwise log problematic file/blob
100
+ if blob.instance_of?(Grit::Blob)
101
+ process_blob(data, blob, file)
102
+ else
103
+ puts "Problem processing file #{file[:file]}"
104
+ end
71
105
  end
106
+ end
107
+
108
+ def get_blob(sha, file)
109
+ # Split up file for Grit navigation
110
+ file = file[:file].split(File::Separator)
111
+
112
+ # Acquire blob of the file for this specific commit
113
+ blob = find_blob_in_tree(sha, @repo.tree(sha), file)
72
114
 
73
- # Only extract diff details if they exist
74
- if buffer.size > 1
115
+ # If we cannot find blob in current commit (deleted file), check previous commit
116
+ if blob == nil || blob.instance_of?(Grit::Tree)
117
+ prev_commit = @repo.commits(sha).first.parents[0]
118
+ return nil if prev_commit == nil
75
119
 
120
+ prev_tree = @repo.tree(prev_commit.id)
121
+ blob = find_blob_in_tree(prev_commit.id, prev_tree, file)
122
+ end
123
+ return blob
124
+ end
125
+
126
+ def identify_changed_files(buffer)
127
+ # If the buffer is larger than 2 lines then we have per-file details to process
128
+ changed_files = []
129
+ if buffer.size > 2
130
+
131
+ # For each modification extract the details
76
132
  buffer[2..-1].each do |line|
77
133
 
78
- next if extract_changes(commit, line)
79
- next if extract_create_delete_file(commit, line)
80
- next if extract_rename_copy_file(commit, line)
134
+ # Extract changed file information if it exists
135
+ data = extract_change_file(line)
136
+ if data != nil
137
+ changed_files << data
138
+ next # This line is processed, skip to next
139
+ end
140
+
141
+ # Extract details of create/delete files if it exists
142
+ data = extract_create_delete_file(line)
143
+ if data != nil
144
+ augmented = false
145
+ # Augment changed file with create/delete information if possible
146
+ changed_files.each do |file|
147
+ if file[:file] == data[:file]
148
+ file[:status] = data[:status]
149
+ augmented = true
150
+ break
151
+ end
152
+ end
153
+ changed_files << data if !augmented
154
+ next # This line is processed, skip to next
155
+ end
156
+
157
+ # Extract details of rename/copy files if it exists
158
+ data = extract_rename_copy_file(line)
159
+ if data != nil
160
+ augmented = false
161
+ # Augment changed file with rename/copy information if possible
162
+ changed_files.each do |file|
163
+ if file[:file] == data[:new_file]
164
+ file[:status] = data[:status]
165
+ file[:old_file] = data[:old_file]
166
+ file[:similar] = data[:similar]
167
+ augmented = true
168
+ break
169
+ end
170
+ end
171
+ changed_files << data if !augmented
172
+ next # This line is processed, skip to next
173
+ end
174
+ end
175
+ end
176
+ return changed_files
177
+ end
178
+
179
+ def find_blob_in_tree(sha, tree, file)
180
+ # Check If cannot find tree in commit or if we found a submodule as the changed file
181
+ if tree == nil
182
+ return nil
183
+ elsif tree.instance_of?(Grit::Submodule)
184
+ return tree
185
+ end
186
+
187
+ # If the blob is within the current directory (tree)
188
+ if file.size == 1
189
+ blob = tree / file.first
190
+
191
+ # Check if blob is nil (could not find changed file in tree)
192
+ if blob == nil
193
+
194
+ # Try looking for submodules as they cannot be found using tree / file notation
195
+ tree.contents.each do |content|
196
+ if file.first == content.name
197
+ return nil
198
+ end
199
+ end
200
+
201
+ # Exit through recusion with the base case of a nil tree/blob
202
+ return find_blob_in_tree(sha, blob, file)
203
+ end
204
+ return blob
205
+ else
206
+ # Explore deeper in the tree to find the blob of the changed file
207
+ return find_blob_in_tree(sha, tree / file.first, file[1..-1])
208
+ end
209
+ end
81
210
 
211
+ def process_blob(data, blob, file)
212
+ # Initialize a hash to hold information regarding the file
213
+ file_hash = Hash.new(0)
214
+ file_hash[:name] = file[:file]
215
+ file_hash[:additions] = file[:additions]
216
+ file_hash[:deletions] = file[:deletions]
217
+ file_hash[:status] = file[:status]
218
+
219
+ # Add file information to commit itself
220
+ data[file[:status].to_sym] += 1 if file[:status] != nil
221
+ data[:additions] += file[:additions]
222
+ data[:deletions] += file[:deletions]
223
+
224
+ # Handle submodule if present, otherwise acquire specifics on blob
225
+ if blob.instance_of?(Grit::Submodule)
226
+ file_hash[:language] = "Submodule"
227
+ else
228
+ file_hash[:binary] = blob.binary?
229
+ file_hash[:image] = blob.image?
230
+ file_hash[:vendored] = blob.vendored?
231
+ file_hash[:generated] = blob.generated?
232
+
233
+ # Identify the language of the blob if possible
234
+ if blob.language == nil
235
+ file_hash[:language] = "Unknown"
236
+ else
237
+ file_hash[:language] = blob.language.name
82
238
  end
83
239
  end
240
+ data[:files] << file_hash
241
+ end
242
+
243
+ def clean_string(file_name)
244
+ #if file_name.include?("foo")
245
+ #blob = @repo.tree("1ec5c2674fd792e8f9ddbff5afcacc3e1f7c506d") / "actionpack" / "test" / "fixtures" / "public" / "foo"
246
+ #ap "=-=-=-=-=-=-="
247
+ #ap file_name
248
+ #ap "--------------------"
249
+ #ap blob.contents[2].name
250
+ #ap "=-=-=-=-=-=-="
251
+ #end
252
+ # Clean up a string and force utf-8 encoding
253
+ return file_name.strip.gsub('"', '').gsub("\\\\", "\\").force_encoding("utf-8")
84
254
  end
85
255
 
86
- def extract_changes(commit, line)
87
- changes = line.scan( /(\d+)\s(\d+)\s(.*)/ )[0]
256
+ def extract_change_file(line)
257
+ # Use regex to detect a rename/copy changed file | 1 2 /path/{test => new}/file.txt
258
+ changes = line.scan(/^([-|\d]+)\s+([-|\d]+)\s+(.+)\s+=>\s+(.+)/)[0]
259
+ if changes != nil and changes.size == 4
260
+ # Split up the file into the old and new file
261
+ split_file = split_old_new_file(changes[2], changes[3])
262
+ return {:additions => changes[0].to_i,
263
+ :deletions => changes[1].to_i,
264
+ :file => clean_string(split_file[:new_file]),
265
+ :old_file => clean_string(split_file[:old_file])}
266
+ end
88
267
 
268
+ # Use regex to detect a changed file | 1 2 /path/test/file.txt
269
+ changes = line.scan(/^([-|\d]+)\s+([-|\d]+)\s+(.+)/)[0]
89
270
  if changes != nil and changes.size == 3
90
- commit[:insertions] += changes[0].to_i
91
- commit[:deletions] += changes[1].to_i
92
- return true
271
+ return {:additions => changes[0].to_i,
272
+ :deletions => changes[1].to_i,
273
+ :file => clean_string(changes[2])}
93
274
  end
275
+ return nil
94
276
  end
95
277
 
96
- def extract_create_delete_file(commit, line)
97
- changes = line.scan(/(create|delete) mode \d+ ([^\\\n]*)/)[0]
98
-
278
+ def extract_create_delete_file(line)
279
+ # Use regex to detect a create/delete file | create mode 100644 /path/test/file.txt
280
+ changes = line.scan(/^(create|delete) mode \d+ ([^\\\n]*)/)[0]
99
281
  if changes != nil and changes.size == 2
100
- commit[:creates] += 1 if changes[0] == "create"
101
- commit[:deletes] += 1 if changes[0] == "delete"
102
- return true
282
+ return {:status => clean_string(changes[0]),
283
+ :file => clean_string(changes[1])}
103
284
  end
285
+ return nil
104
286
  end
105
287
 
106
- def extract_rename_copy_file(commit, line)
107
- changes = line.scan(/(rename|copy)([^(]*)/)[0]
288
+ def extract_rename_copy_file(line)
289
+ # Use regex to detect a rename/copy file | copy /path/{test => new}/file.txt
290
+ changes = line.scan(/^(rename|copy)\s+(.+)\s+=>\s+(.+)\s+\((\d+)/)[0]
291
+ if changes != nil and changes.size == 4
292
+ # Split up the file into the old and new file
293
+ split_file = split_old_new_file(changes[1], changes[2])
294
+ return {:status => clean_string(changes[0]),
295
+ :old_file => clean_string(split_file[:old_file]),
296
+ :new_file => clean_string(split_file[:new_file]),
297
+ :similar => changes[3].to_i}
298
+ end
299
+ return nil
300
+ end
108
301
 
109
- if changes != nil and changes.size == 2
110
- commit[:renames] += 1 if changes[0] == "rename"
111
- commit[:copies] += 1 if changes[0] == "copy"
302
+ def split_old_new_file(old, new)
303
+ # Split the old and new chunks up (separted by the =>)
304
+ split_old = old.split('{')
305
+ split_new = new.split('}')
306
+
307
+ # Handle recombine the file splits into their whole paths)
308
+ if split_old.size == 1 && split_new.size == 1
309
+ old_file = split_old[0]
310
+ new_file = split_new[0]
311
+ elsif split_new.size == 1
312
+ old_file = split_old[0] + split_old[1] + split_new[0]
313
+ new_file = split_old[0] + split_new[0]
314
+ elsif split_old.size == 1
315
+ old_file = split_old[0] + split_new[1]
316
+ new_file = split_old[0] + split_new[0] + split_new[1]
317
+ else
318
+ old_file = split_old[0] + split_old[1] + split_new[1]
319
+ new_file = split_old[0] + split_new[0] + split_new[1]
112
320
  end
113
- return true
321
+
322
+ # Return files, yet remove the '//' if present from combining splits
323
+ return {:old_file => old_file.gsub('//', '/'),
324
+ :new_file => new_file.gsub('//', '/')}
114
325
  end
115
326
 
116
327
  def print_summary(sort_type, email, n=0)
328
+ # Default to a 0 if given a negative number to display
117
329
  n = 0 if n < 0
118
330
 
119
- data = @commits.author_top_n_type(email, sort_type, n)
120
-
331
+ # Acquire data based on sorty type and top # to show
332
+ data = @commits.author_top_n_type(sort_type, n)
121
333
  if data == nil
122
- puts "ERROR: Parameter for --sort is not valid"
123
- return
334
+ raise "Parameter for --sort is not valid"
124
335
  end
125
336
 
126
- # Find the longest name/email (used for string formatting)
127
- total_authors = @commits.author_list.length
128
- author_length = 17
337
+ # Acquire formatting pattern for output
338
+ author_length = find_longest_author(data)
339
+ language_length = find_longest_language(data)
340
+ pattern = "%-#{author_length}s | %-#{language_length}s | %7s | %9s | %9s | %7s | %7s | %7s | %6s | %6s |"
341
+
342
+ # Print query/header information
343
+ print_header(pattern, sort_type, n, author_length, language_length)
344
+
345
+ # Print per author information
129
346
  data.each do |key,value|
130
- author_length = key.length if key.length > author_length
347
+ puts pattern % [key, "", value[:commits], value[:additions],
348
+ value[:deletions], value[:create], value[:delete],
349
+ value[:rename], value[:copy], value[:merges]]
350
+ print_language_data(pattern, value)
351
+ end
352
+
353
+ # Reprint query/header for repository information
354
+ print_header(pattern, sort_type, n, author_length, language_length)
355
+ data = @commits.totals
356
+ puts pattern % ["Repository Totals", "", data[:commits],
357
+ data[:additions], data[:deletions], data[:create],
358
+ data[:delete], data[:rename], data[:copy], data[:merges]]
359
+ print_language_data(pattern, data)
360
+ end
361
+
362
+ def print_language_data(pattern, data)
363
+ # Print information of each language for the data
364
+ data[:languages].each do |key,value|
365
+ puts pattern % ["", key, "", value[:additions], value[:deletions],
366
+ value[:create], value[:delete], value[:rename],
367
+ value[:copy], value[:merges]]
131
368
  end
369
+ end
132
370
 
133
- # Print header information
371
+ def print_header(pattern, sort_type, n, author_length, language_length)
372
+ total_authors = @commits.author_list.length
373
+
374
+ # Print summary information of displayed results
134
375
  if n > 0 and n < total_authors
135
- puts "Top #{n} authors(#{total_authors}) sorted by #{sort_type.to_s}\n\n"
376
+ puts "\nTop #{n} authors(#{total_authors}) sorted by #{sort_type.to_s}\n"
136
377
  else
137
- puts "All authors(#{total_authors}) sorted by #{sort_type.to_s}\n\n"
378
+ puts "\nAll authors(#{total_authors}) sorted by #{sort_type.to_s}\n"
138
379
  end
139
380
 
140
- pattern = "%-#{author_length}s|%7s|%10s|%9s|%7s|%7s|%7s|%6s|%6s|"
141
- puts pattern % ['Name/email', 'commits', 'insertions', 'deletions', 'creates', 'deletes', 'renames', 'copies', 'merges']
142
- puts "-"*68 + "-"*author_length
381
+ # Print column headers
382
+ puts "-"*87 + "-"*author_length + "-"*language_length
383
+ puts pattern % ['Name/Email', 'Language', 'Commits', 'Additions', 'Deletions', 'Creates', 'Deletes', 'Renames', 'Copies', 'Merges']
384
+ puts "-"*87 + "-"*author_length + "-"*language_length
385
+ end
143
386
 
387
+ def find_longest_author(data)
388
+ # Find the longest author name/email (for string formatting)
389
+ total_authors = @commits.author_list.length
390
+ author_length = 17
144
391
  data.each do |key,value|
145
- puts pattern % [key, value[:commits], value[:insertions], value[:deletions],
146
- value[:creates], value[:deletes], value[:renames], value[:copies], value[:merges]]
392
+ author_length = key.length if key.length > author_length
147
393
  end
394
+ return author_length
395
+ end
148
396
 
149
- puts "-"*68 + "-"*author_length
150
- puts pattern % ["Repository Totals", @commits.totals[:commits],
151
- @commits.totals[:insertions], @commits.totals[:deletions], @commits.totals[:creates],
152
- @commits.totals[:deletes], @commits.totals[:renames], @commits.totals[:copies], @commits.totals[:merges]]
397
+ def find_longest_language(data)
398
+ # Find the longest language name (for string formatting)
399
+ total_language = @commits.language_list.length
400
+ language_length = 9
401
+ @commits.language_list.each do |key,value|
402
+ language_length = key.length if key.length > language_length
403
+ end
404
+ return language_length
153
405
  end
154
406
  end
155
407
  end