git-health-check 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1 +1 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
@@ -4,20 +4,20 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'git-health-check/version'
5
5
 
6
6
  Gem::Specification.new do |gem|
7
- gem.name = "git-health-check"
7
+ gem.name = 'git-health-check'
8
8
  gem.version = GitHealthCheck::VERSION
9
- gem.authors = ["Ben Snape"]
10
- gem.email = ["bsnape@gmail.com"]
9
+ gem.authors = ['Ben Snape']
10
+ gem.email = ['bsnape@gmail.com']
11
11
  gem.description = %q{Git Health Check}
12
12
  gem.summary = %q{Git Health Check}
13
- gem.homepage = "http://wwww.bensnape.com"
13
+ gem.homepage = 'http://wwww.bensnape.com'
14
14
 
15
15
  gem.files = `git ls-files`.split($/)
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
- gem.require_paths = ["lib"]
18
+ gem.require_paths = ['lib']
19
19
 
20
- gem.add_dependency("ruport")
21
- gem.add_development_dependency("rake")
20
+ gem.add_dependency('ruport')
21
+ gem.add_development_dependency('rake')
22
22
 
23
23
  end
@@ -1,4 +1,4 @@
1
- require "git-health-check/version"
1
+ require 'git-health-check/version'
2
2
  require 'git-health-check/history'
3
3
  require 'git-health-check/working_copy'
4
4
  require 'git-health-check/report'
@@ -12,18 +12,18 @@ module GitHealthCheck
12
12
  end
13
13
 
14
14
  def execute(view)
15
- history = GitHealthCheck::History.new(@repository, 'HEAD', @threshold)
15
+ history = GitHealthCheck::History.new(@repository, @threshold)
16
16
  working_copy = GitHealthCheck::WorkingCopy.new @repository
17
17
  packfile = GitHealthCheck::Packfile.new(@repository)
18
18
  packfile.packfile_stats
19
19
 
20
- working_copy_output = working_copy.fast_find_in_working_copy
20
+ working_copy_output = working_copy.find_in_working_copy
21
21
  history_output = history.search
22
22
 
23
- working_copy_report = Table("object sha", "size (kB)", "compressed (kB)", "path")
24
- history_report = Table("object sha", "size (MB)", "path", "commit details", "author")
23
+ working_copy_report = Table('object sha', 'size (MB)', 'path')
24
+ history_report = Table('object sha', 'size (MB)', 'path', 'commit details', 'author')
25
25
 
26
- working_copy_output.each { |sha, (size, csize, path)| working_copy_report << [sha, size, csize, path] }
26
+ working_copy_output.each { |sha, size, path| working_copy_report << [sha, size, path] }
27
27
  history_output.each { |sha, size, path, where, who| history_report << [sha, size, path, where, who] }
28
28
 
29
29
  report = GitHealthCheck::Report.new(working_copy_report.to_html, history_report.to_html, packfile)
@@ -29,13 +29,13 @@ EOB
29
29
  def set_parser_options
30
30
  @parser.banner = banner
31
31
 
32
- @parser.separator "Options:"
32
+ @parser.separator 'Options:'
33
33
 
34
- @parser.on("-v", "--version", "Displays the gem version") { @command_class = VersionCommand }
34
+ @parser.on('-v', '--version', 'Displays the gem version') { @command_class = VersionCommand }
35
35
 
36
- @parser.on("-h", "--help", "Displays this help message") { @command_class = HelpCommand }
36
+ @parser.on('-h', '--help', 'Displays this help message') { @command_class = HelpCommand }
37
37
 
38
- @parser.on("-t", "--threshold THRESHOLD", Float, "Specify history size threshold in MB (default 0.5)") do |n|
38
+ @parser.on('-t', '--threshold THRESHOLD', Float, 'Specify history size threshold in MB (default 0.5)') do |n|
39
39
  @threshold = n
40
40
  end
41
41
  end
@@ -38,5 +38,21 @@ module GitHealthCheck
38
38
  `git count-objects -v`
39
39
  end
40
40
 
41
+ def get_largest_files(number=10)
42
+ `git ls-files -z | xargs -0 ls -l | sort -nrk5 | head -n #{number}`
43
+ end
44
+
45
+ def get_object_sha_from_path(path)
46
+ `git ls-files -s #{path} | cut -d ' ' -f 2`
47
+ end
48
+
49
+ def get_revision_list(head='HEAD')
50
+ `git rev-list #{head}`
51
+ end
52
+
53
+ def get_treeish_contents(treeish)
54
+ `git ls-tree -zrl #{treeish}`
55
+ end
56
+
41
57
  end
42
58
  end
@@ -5,36 +5,36 @@ module GitHealthCheck
5
5
 
6
6
  class History
7
7
 
8
- MEGABYTE = 1000 ** 2
8
+ MEGABYTE = 1024 ** 2
9
9
 
10
- def initialize(repository, head = 'HEAD', threshold = 0.1)
11
- @head = head
10
+ def initialize(repository, threshold)
11
+ @repository = repository
12
12
  @bytes_threshold = threshold.to_f * MEGABYTE
13
13
  Dir.chdir repository
14
14
  @git_lib = GitHealthCheck::GitLib.new repository
15
15
  end
16
16
 
17
17
  def search
18
-
18
+ revision_list = @git_lib.get_revision_list.split "\n"
19
19
  big_files = {}
20
20
 
21
- # list commit objects in chronological order
22
- IO.popen("git rev-list #@head", 'r') do |rev_list|
23
- rev_list.each_line do |commit|
24
- # list contents of the tree object
25
- `git ls-tree -zrl #{commit.chomp!}`.split("\0").each do |object|
26
- bits, type, sha, size, path = object.split(/\s+/, 5)
27
- size = size.to_i
28
- big_files[sha] = [path, size, commit] if size >= @bytes_threshold
29
- end
21
+ revision_list.each do |commit|
22
+ @git_lib.get_treeish_contents(commit).split("\0").each do |object|
23
+ bits, type, sha, size, path = object.split
24
+ next if File.exist?("#@repository/#{path}")
25
+ size = size.to_f
26
+ big_files[path] = [sha, size, commit] if size >= @bytes_threshold
30
27
  end
31
28
  end
32
29
 
33
- big_files.map do |sha, (path, size, commit_sha)|
30
+ big_files = big_files.map do |path, (sha, size, commit_sha)|
34
31
  where = @git_lib.get_commit_details commit_sha
35
32
  who = @git_lib.get_commit_author commit_sha
36
- [sha, size.to_f / MEGABYTE, path, where, who]
33
+ [sha, (size / MEGABYTE).round(2), path, where, who]
37
34
  end
35
+
36
+ big_files.sort_by! { |a| [a[1], a[2]] }.reverse!
37
+
38
38
  end
39
39
 
40
40
  end
@@ -8,7 +8,7 @@ module GitHealthCheck
8
8
  @working_copy = working_copy
9
9
  @history = history
10
10
  @packfile = packfile
11
- @report_directory = Dir.pwd + "/healthcheck"
11
+ @report_directory = Dir.pwd + '/healthcheck'
12
12
  @repository = Dir.pwd
13
13
  end
14
14
 
@@ -17,7 +17,7 @@ module GitHealthCheck
17
17
  end
18
18
 
19
19
  def get_template
20
- File.read(File.dirname(__FILE__) + "/report/report.erb")
20
+ File.read(File.dirname(__FILE__) + '/report/report.erb')
21
21
  end
22
22
 
23
23
  def create
@@ -26,7 +26,7 @@ module GitHealthCheck
26
26
 
27
27
  Dir.mkdir @report_directory unless File.directory? @report_directory
28
28
 
29
- File.open(@report_directory + "/report.html", "w+") do |f|
29
+ File.open(@report_directory + '/report.html', 'w+') do |f|
30
30
  f.write output
31
31
  end
32
32
  end
@@ -39,14 +39,15 @@
39
39
 
40
40
  h3 {
41
41
  font-size: 16px;
42
+ padding: 0 0 0 20px;
42
43
  }
43
44
 
44
45
  #history {
45
- padding: 0 0 0 20px;
46
+ padding: 0 0 0 30px;
46
47
  }
47
48
 
48
49
  #working-copy {
49
- padding: 0 0 0 20px;
50
+ padding: 0 0 0 30px;
50
51
  }
51
52
  </style>
52
53
  </head>
@@ -64,11 +65,11 @@
64
65
  <li><strong>Total packfile size: </strong>
65
66
  <% size = @packfile.size_of_pack[0].to_f %>
66
67
  <% if size >= 1048576 %>
67
- <%= size / 1048576 %> GB
68
+ <%= (size / 1048576).round 2 %> GB
68
69
  <% elsif size >= 1024 %>
69
- <%= size / 1024 %> MB
70
+ <%= (size / 1024).round 2 %> MB
70
71
  <% else %>
71
- <%= size %> kB
72
+ <%= size.round 2 %> kB
72
73
  <% end %>
73
74
  </li>
74
75
  </ul>
@@ -78,24 +79,41 @@
78
79
 
79
80
  <h2>Repository Size</h2>
80
81
 
82
+ <h3>Working Copy</h3>
83
+
81
84
  <div id="working-copy">
82
85
 
83
- <h3>Working Copy</h3>
86
+ <h4>Description</h4>
87
+
88
+ <p>This metric inspects your repository's working copy using
89
+ <a href="http://www.kernel.org/pub/software/scm/git/docs/git-ls-files.html">git ls-files</a> to identify the
90
+ <em>n</em> (default 10) largest files - and consequently the <strong>largest objects</strong>.</p>
84
91
 
85
- <p>This metric inspects your repository's <a href="http://git-scm.com/book/en/Git-Internals-Packfiles">packfile</a>
86
- contents via its index to identify the <em>n</em> (default 10) largest objects - and consequently the
87
- <strong>largest files</strong> - in your working copy.</p> <br>
92
+ <p>In some cases the same object hash will be displayed for multiple paths. <strong>This is not a bug.</strong>
93
+ The way git stores <a href="http://git-scm.com/book/en/Git-Internals-Git-Objects">blob objects</a> - i.e. bytes on
94
+ the filesystem that could be anything (e.g. a text file, source code, an image) - means that if you have
95
+ <em>exactly</em> the same file in multiple places in your repository - perhaps a static test file - then the
96
+ report will correctly output <em>all the paths on the filesystem where the object is referenced</em>.</p>
97
+
98
+ <h4>Statistics</h4>
88
99
 
89
100
  <p><%= @working_copy %></p>
90
101
 
91
102
  </div>
92
103
 
104
+ <br>
105
+
106
+ <h3>History</h3>
107
+
93
108
  <div id="history">
94
- <h3>History</h3>
109
+
110
+ <h4>Description</h4>
95
111
 
96
112
  <p>This metric thoroughly inspects your repository's history and identifies the largest files over a configurable
97
- threshold (defaults at 0.5 MB) that have been committed in the past <strong>but are
98
- no longer part of the working copy</strong>.</p> <br>
113
+ threshold (defaults at 0.5 MB) that have been committed in the past <strong>but are no longer part of the working
114
+ copy</strong>.</p>
115
+
116
+ <h4>Statistics</h4>
99
117
 
100
118
  <p><%= @history %></p>
101
119
  </div>
@@ -1,3 +1,3 @@
1
1
  module GitHealthCheck
2
- VERSION = "0.0.2"
2
+ VERSION = '0.0.3'
3
3
  end
@@ -7,34 +7,23 @@ module GitHealthCheck
7
7
  @git_lib = GitHealthCheck::GitLib.new repository
8
8
  end
9
9
 
10
- def fast_find_in_working_copy(number=10)
11
- raise 'no packs found' if @git_lib.get_pack_number == 0
12
- objects = `git verify-pack -v .git/objects/pack/pack-*.idx | grep -E 'tree|blob' | sort -k4nr`
13
-
14
- objects = objects.split("\n")
15
-
16
- test = {}
17
-
18
- objects.each do |object|
19
- break if test.size == number
20
- sha = object.match(/^\w+/)[0]
21
- #path = `git rev-list --all --objects | grep #{sha} | cut -f 2 -d ' '`.chomp
22
- path = `git rev-list --all --objects | grep #{sha}`.match(/\w+\s(.*)/)[1]
23
- # an empty path means you need to do a garbage collection
24
- next unless File.exist?("#@repository/#{path}")
25
-
26
- sha, type, size, size_in_pack, offset = object.split
27
-
28
- # convert from byte to kilobyte
29
- size = (size.to_f / 1024).round 2
30
- size_in_pack = (size_in_pack.to_f / 1024).round 2
31
-
32
- test[sha] = [size, size_in_pack, path]
10
+ def find_in_working_copy(number=10)
11
+ largest_files = @git_lib.get_largest_files number
12
+ largest_files = largest_files.split "\n"
13
+
14
+ files = []
15
+
16
+ largest_files.each do |file|
17
+ split = file.split
18
+ size = split[4]
19
+ path = split[8]
20
+ sha = @git_lib.get_object_sha_from_path path
21
+ size = (size.to_f * 9.53674e-7).round 2 # bytes to MB
22
+ files << [sha, size, path] # no hash in case of blob-reuse in various paths
33
23
  end
34
24
 
35
- test
25
+ files
36
26
  end
37
27
 
38
-
39
28
  end
40
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: git-health-check
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-15 00:00:00.000000000 Z
12
+ date: 2013-02-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruport