git-health-check 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1 +1 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
@@ -4,20 +4,20 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'git-health-check/version'
5
5
 
6
6
  Gem::Specification.new do |gem|
7
- gem.name = "git-health-check"
7
+ gem.name = 'git-health-check'
8
8
  gem.version = GitHealthCheck::VERSION
9
- gem.authors = ["Ben Snape"]
10
- gem.email = ["bsnape@gmail.com"]
9
+ gem.authors = ['Ben Snape']
10
+ gem.email = ['bsnape@gmail.com']
11
11
  gem.description = %q{Git Health Check}
12
12
  gem.summary = %q{Git Health Check}
13
- gem.homepage = "http://wwww.bensnape.com"
13
+ gem.homepage = 'http://wwww.bensnape.com'
14
14
 
15
15
  gem.files = `git ls-files`.split($/)
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
- gem.require_paths = ["lib"]
18
+ gem.require_paths = ['lib']
19
19
 
20
- gem.add_dependency("ruport")
21
- gem.add_development_dependency("rake")
20
+ gem.add_dependency('ruport')
21
+ gem.add_development_dependency('rake')
22
22
 
23
23
  end
@@ -1,4 +1,4 @@
1
- require "git-health-check/version"
1
+ require 'git-health-check/version'
2
2
  require 'git-health-check/history'
3
3
  require 'git-health-check/working_copy'
4
4
  require 'git-health-check/report'
@@ -12,18 +12,18 @@ module GitHealthCheck
12
12
  end
13
13
 
14
14
  def execute(view)
15
- history = GitHealthCheck::History.new(@repository, 'HEAD', @threshold)
15
+ history = GitHealthCheck::History.new(@repository, @threshold)
16
16
  working_copy = GitHealthCheck::WorkingCopy.new @repository
17
17
  packfile = GitHealthCheck::Packfile.new(@repository)
18
18
  packfile.packfile_stats
19
19
 
20
- working_copy_output = working_copy.fast_find_in_working_copy
20
+ working_copy_output = working_copy.find_in_working_copy
21
21
  history_output = history.search
22
22
 
23
- working_copy_report = Table("object sha", "size (kB)", "compressed (kB)", "path")
24
- history_report = Table("object sha", "size (MB)", "path", "commit details", "author")
23
+ working_copy_report = Table('object sha', 'size (MB)', 'path')
24
+ history_report = Table('object sha', 'size (MB)', 'path', 'commit details', 'author')
25
25
 
26
- working_copy_output.each { |sha, (size, csize, path)| working_copy_report << [sha, size, csize, path] }
26
+ working_copy_output.each { |sha, size, path| working_copy_report << [sha, size, path] }
27
27
  history_output.each { |sha, size, path, where, who| history_report << [sha, size, path, where, who] }
28
28
 
29
29
  report = GitHealthCheck::Report.new(working_copy_report.to_html, history_report.to_html, packfile)
@@ -29,13 +29,13 @@ EOB
29
29
  def set_parser_options
30
30
  @parser.banner = banner
31
31
 
32
- @parser.separator "Options:"
32
+ @parser.separator 'Options:'
33
33
 
34
- @parser.on("-v", "--version", "Displays the gem version") { @command_class = VersionCommand }
34
+ @parser.on('-v', '--version', 'Displays the gem version') { @command_class = VersionCommand }
35
35
 
36
- @parser.on("-h", "--help", "Displays this help message") { @command_class = HelpCommand }
36
+ @parser.on('-h', '--help', 'Displays this help message') { @command_class = HelpCommand }
37
37
 
38
- @parser.on("-t", "--threshold THRESHOLD", Float, "Specify history size threshold in MB (default 0.5)") do |n|
38
+ @parser.on('-t', '--threshold THRESHOLD', Float, 'Specify history size threshold in MB (default 0.5)') do |n|
39
39
  @threshold = n
40
40
  end
41
41
  end
@@ -38,5 +38,21 @@ module GitHealthCheck
38
38
  `git count-objects -v`
39
39
  end
40
40
 
41
+ def get_largest_files(number=10)
42
+ `git ls-files -z | xargs -0 ls -l | sort -nrk5 | head -n #{number}`
43
+ end
44
+
45
+ def get_object_sha_from_path(path)
46
+ `git ls-files -s #{path} | cut -d ' ' -f 2`
47
+ end
48
+
49
+ def get_revision_list(head='HEAD')
50
+ `git rev-list #{head}`
51
+ end
52
+
53
+ def get_treeish_contents(treeish)
54
+ `git ls-tree -zrl #{treeish}`
55
+ end
56
+
41
57
  end
42
58
  end
@@ -5,36 +5,36 @@ module GitHealthCheck
5
5
 
6
6
  class History
7
7
 
8
- MEGABYTE = 1000 ** 2
8
+ MEGABYTE = 1024 ** 2
9
9
 
10
- def initialize(repository, head = 'HEAD', threshold = 0.1)
11
- @head = head
10
+ def initialize(repository, threshold)
11
+ @repository = repository
12
12
  @bytes_threshold = threshold.to_f * MEGABYTE
13
13
  Dir.chdir repository
14
14
  @git_lib = GitHealthCheck::GitLib.new repository
15
15
  end
16
16
 
17
17
  def search
18
-
18
+ revision_list = @git_lib.get_revision_list.split "\n"
19
19
  big_files = {}
20
20
 
21
- # list commit objects in chronological order
22
- IO.popen("git rev-list #@head", 'r') do |rev_list|
23
- rev_list.each_line do |commit|
24
- # list contents of the tree object
25
- `git ls-tree -zrl #{commit.chomp!}`.split("\0").each do |object|
26
- bits, type, sha, size, path = object.split(/\s+/, 5)
27
- size = size.to_i
28
- big_files[sha] = [path, size, commit] if size >= @bytes_threshold
29
- end
21
+ revision_list.each do |commit|
22
+ @git_lib.get_treeish_contents(commit).split("\0").each do |object|
23
+ bits, type, sha, size, path = object.split
24
+ next if File.exist?("#@repository/#{path}")
25
+ size = size.to_f
26
+ big_files[path] = [sha, size, commit] if size >= @bytes_threshold
30
27
  end
31
28
  end
32
29
 
33
- big_files.map do |sha, (path, size, commit_sha)|
30
+ big_files = big_files.map do |path, (sha, size, commit_sha)|
34
31
  where = @git_lib.get_commit_details commit_sha
35
32
  who = @git_lib.get_commit_author commit_sha
36
- [sha, size.to_f / MEGABYTE, path, where, who]
33
+ [sha, (size / MEGABYTE).round(2), path, where, who]
37
34
  end
35
+
36
+ big_files.sort_by! { |a| [a[1], a[2]] }.reverse!
37
+
38
38
  end
39
39
 
40
40
  end
@@ -8,7 +8,7 @@ module GitHealthCheck
8
8
  @working_copy = working_copy
9
9
  @history = history
10
10
  @packfile = packfile
11
- @report_directory = Dir.pwd + "/healthcheck"
11
+ @report_directory = Dir.pwd + '/healthcheck'
12
12
  @repository = Dir.pwd
13
13
  end
14
14
 
@@ -17,7 +17,7 @@ module GitHealthCheck
17
17
  end
18
18
 
19
19
  def get_template
20
- File.read(File.dirname(__FILE__) + "/report/report.erb")
20
+ File.read(File.dirname(__FILE__) + '/report/report.erb')
21
21
  end
22
22
 
23
23
  def create
@@ -26,7 +26,7 @@ module GitHealthCheck
26
26
 
27
27
  Dir.mkdir @report_directory unless File.directory? @report_directory
28
28
 
29
- File.open(@report_directory + "/report.html", "w+") do |f|
29
+ File.open(@report_directory + '/report.html', 'w+') do |f|
30
30
  f.write output
31
31
  end
32
32
  end
@@ -39,14 +39,15 @@
39
39
 
40
40
  h3 {
41
41
  font-size: 16px;
42
+ padding: 0 0 0 20px;
42
43
  }
43
44
 
44
45
  #history {
45
- padding: 0 0 0 20px;
46
+ padding: 0 0 0 30px;
46
47
  }
47
48
 
48
49
  #working-copy {
49
- padding: 0 0 0 20px;
50
+ padding: 0 0 0 30px;
50
51
  }
51
52
  </style>
52
53
  </head>
@@ -64,11 +65,11 @@
64
65
  <li><strong>Total packfile size: </strong>
65
66
  <% size = @packfile.size_of_pack[0].to_f %>
66
67
  <% if size >= 1048576 %>
67
- <%= size / 1048576 %> GB
68
+ <%= (size / 1048576).round 2 %> GB
68
69
  <% elsif size >= 1024 %>
69
- <%= size / 1024 %> MB
70
+ <%= (size / 1024).round 2 %> MB
70
71
  <% else %>
71
- <%= size %> kB
72
+ <%= size.round 2 %> kB
72
73
  <% end %>
73
74
  </li>
74
75
  </ul>
@@ -78,24 +79,41 @@
78
79
 
79
80
  <h2>Repository Size</h2>
80
81
 
82
+ <h3>Working Copy</h3>
83
+
81
84
  <div id="working-copy">
82
85
 
83
- <h3>Working Copy</h3>
86
+ <h4>Description</h4>
87
+
88
+ <p>This metric inspects your repository's working copy using
89
+ <a href="http://www.kernel.org/pub/software/scm/git/docs/git-ls-files.html">git ls-files</a> to identify the
90
+ <em>n</em> (default 10) largest files - and consequently the <strong>largest objects</strong>.</p>
84
91
 
85
- <p>This metric inspects your repository's <a href="http://git-scm.com/book/en/Git-Internals-Packfiles">packfile</a>
86
- contents via its index to identify the <em>n</em> (default 10) largest objects - and consequently the
87
- <strong>largest files</strong> - in your working copy.</p> <br>
92
+ <p>In some cases the same object hash will be displayed for multiple paths. <strong>This is not a bug.</strong>
93
+ The way git stores <a href="http://git-scm.com/book/en/Git-Internals-Git-Objects">blob objects</a> - i.e. bytes on
94
+ the filesystem that could be anything (e.g. a text file, source code, an image) - means that if you have
95
+ <em>exactly</em> the same file in multiple places in your repository - perhaps a static test file - then the
96
+ report will correctly output <em>all the paths on the filesystem where the object is referenced</em>.</p>
97
+
98
+ <h4>Statistics</h4>
88
99
 
89
100
  <p><%= @working_copy %></p>
90
101
 
91
102
  </div>
92
103
 
104
+ <br>
105
+
106
+ <h3>History</h3>
107
+
93
108
  <div id="history">
94
- <h3>History</h3>
109
+
110
+ <h4>Description</h4>
95
111
 
96
112
  <p>This metric thoroughly inspects your repository's history and identifies the largest files over a configurable
97
- threshold (defaults at 0.5 MB) that have been committed in the past <strong>but are
98
- no longer part of the working copy</strong>.</p> <br>
113
+ threshold (defaults at 0.5 MB) that have been committed in the past <strong>but are no longer part of the working
114
+ copy</strong>.</p>
115
+
116
+ <h4>Statistics</h4>
99
117
 
100
118
  <p><%= @history %></p>
101
119
  </div>
@@ -1,3 +1,3 @@
1
1
  module GitHealthCheck
2
- VERSION = "0.0.2"
2
+ VERSION = '0.0.3'
3
3
  end
@@ -7,34 +7,23 @@ module GitHealthCheck
7
7
  @git_lib = GitHealthCheck::GitLib.new repository
8
8
  end
9
9
 
10
- def fast_find_in_working_copy(number=10)
11
- raise 'no packs found' if @git_lib.get_pack_number == 0
12
- objects = `git verify-pack -v .git/objects/pack/pack-*.idx | grep -E 'tree|blob' | sort -k4nr`
13
-
14
- objects = objects.split("\n")
15
-
16
- test = {}
17
-
18
- objects.each do |object|
19
- break if test.size == number
20
- sha = object.match(/^\w+/)[0]
21
- #path = `git rev-list --all --objects | grep #{sha} | cut -f 2 -d ' '`.chomp
22
- path = `git rev-list --all --objects | grep #{sha}`.match(/\w+\s(.*)/)[1]
23
- # an empty path means you need to do a garbage collection
24
- next unless File.exist?("#@repository/#{path}")
25
-
26
- sha, type, size, size_in_pack, offset = object.split
27
-
28
- # convert from byte to kilobyte
29
- size = (size.to_f / 1024).round 2
30
- size_in_pack = (size_in_pack.to_f / 1024).round 2
31
-
32
- test[sha] = [size, size_in_pack, path]
10
+ def find_in_working_copy(number=10)
11
+ largest_files = @git_lib.get_largest_files number
12
+ largest_files = largest_files.split "\n"
13
+
14
+ files = []
15
+
16
+ largest_files.each do |file|
17
+ split = file.split
18
+ size = split[4]
19
+ path = split[8]
20
+ sha = @git_lib.get_object_sha_from_path path
21
+ size = (size.to_f * 9.53674e-7).round 2 # bytes to MB
22
+ files << [sha, size, path] # no hash in case of blob-reuse in various paths
33
23
  end
34
24
 
35
- test
25
+ files
36
26
  end
37
27
 
38
-
39
28
  end
40
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: git-health-check
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-15 00:00:00.000000000 Z
12
+ date: 2013-02-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruport