storage_visualizer 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/storage_visualizer.rb +359 -0
- metadata +50 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 194d65e776b179be3a357f52d97c0b30f5e396d5
|
4
|
+
data.tar.gz: 9299adc1d69eba26b5c9ef859f9606b5be3c7384
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 50c06dded42e0726d18e996700c1fb03f3441491a77bff6dcfb660fac4d283d5373cac51836fcaf85f6a4fdd33ea387ed6a21726ca1f6510602b203eb534a13d
|
7
|
+
data.tar.gz: 2042e22397e7be0eb2fa8b49836c0a5a24b1c4cffbbd830e8fc676ebf5f5136749a4983badb20126dfe168a10b8aa75382aaca973ffed7426e2fd4bf8fe5b250
|
@@ -0,0 +1,359 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'yaml'
|
5
|
+
require 'date'
|
6
|
+
|
7
|
+
class StorageVisualizer
|
8
|
+
|
9
|
+
# Static
|
10
|
+
def self.print_usage
|
11
|
+
puts "\nThis tool helps visualize which directories are occupying the most storage. Any directory that occupies more than 5% of disk space is added to a visual hierarchichal storage report in the form of a Google Sankey diagram. The storage data is gathered using the linux `du` utility. It has been tested on Mac OSX, should work on linux systems, will not work on Windows. Run as sudo if analyzing otherwise inaccessible directories. May take a while to run\n"
|
12
|
+
puts "\nCommand line usage: \n\t[sudo] ./visualize_storage.rb [directory to visualize (default ~/) | -h (help) -i | --install (install to /usr/local/bin)]\n\n"
|
13
|
+
puts "API usage: "
|
14
|
+
puts "\t'require storage_visualizer'"
|
15
|
+
puts "\tsv = StorageVisualizer.new('[directory to visualize, ~/ by default]')"
|
16
|
+
puts "\tsv.run()\n\n"
|
17
|
+
puts "A report will be created in the current directory named as such: StorageReport_2015_05_25-17_19_30.html"
|
18
|
+
puts "Status messages are printed to STDOUT"
|
19
|
+
puts "\n\n"
|
20
|
+
end
|
21
|
+
|
22
|
+
# To do:
|
23
|
+
# - Specify blocksize and do not assume 512 bytes (use the -k flag, which reports blocks as KB)
|
24
|
+
# - Allow the threshold to be specified (default is 5%)
|
25
|
+
# - Allow output filename to be specified
|
26
|
+
# - Enable for filesystems not mounted at the root '/'
|
27
|
+
# - Prevent paths on the graph from crossing
|
28
|
+
# - See if it would be cleaner to use the googlecharts gem (gem install googlecharts)
|
29
|
+
# - Create an installer that sets up cron scheduling and add polling to the webpage
|
30
|
+
# - What to do about directories with the same name under different parents
|
31
|
+
|
32
|
+
# disk Bytes
|
33
|
+
attr_accessor :capacity
|
34
|
+
attr_accessor :used
|
35
|
+
attr_accessor :available
|
36
|
+
# disk GB for display
|
37
|
+
attr_accessor :capacity_gb
|
38
|
+
attr_accessor :used_gb
|
39
|
+
attr_accessor :available_gb
|
40
|
+
# other
|
41
|
+
attr_accessor :target_dir
|
42
|
+
attr_accessor :tree
|
43
|
+
attr_accessor :tree_formatted
|
44
|
+
attr_accessor :diskhash
|
45
|
+
attr_accessor :threshold_pct
|
46
|
+
|
47
|
+
# Constructor
|
48
|
+
def initialize(target_dir_passed = nil)
|
49
|
+
|
50
|
+
if (target_dir_passed != nil)
|
51
|
+
expanded = File.expand_path(target_dir_passed)
|
52
|
+
puts "Target dir: #{expanded}"
|
53
|
+
if (Dir.exist?(expanded))
|
54
|
+
self.target_dir = expanded
|
55
|
+
else
|
56
|
+
raise "Target directory does not exist: #{expanded}"
|
57
|
+
end
|
58
|
+
else
|
59
|
+
# no target passed, use the user's home dir
|
60
|
+
self.target_dir = File.expand_path('~')
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
self.threshold_pct = 0.05
|
65
|
+
self.diskhash = {}
|
66
|
+
self.tree = []
|
67
|
+
self.tree_formatted = ''
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
def format_data_for_the_chart
|
73
|
+
working_string = "[\n"
|
74
|
+
|
75
|
+
self.tree.each_with_index do |entry, index|
|
76
|
+
if(index == self.tree.length - 1)
|
77
|
+
# this is the next to last element, it gets no comma
|
78
|
+
working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ]\n"
|
79
|
+
else
|
80
|
+
# mind the comma
|
81
|
+
working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ],\n"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
working_string << "]\n"
|
85
|
+
self.tree_formatted = working_string
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def write_storage_report
|
91
|
+
|
92
|
+
the_html = %q|<html>
|
93
|
+
<body>
|
94
|
+
<script type="text/javascript"
|
95
|
+
src="https://www.google.com/jsapi?autoload={'modules':[{'name':'visualization','version':'1.1','packages':['sankey']}]}">
|
96
|
+
</script>
|
97
|
+
|
98
|
+
<style>
|
99
|
+
td
|
100
|
+
{
|
101
|
+
font-family:sans-serif;
|
102
|
+
font-size:8pt;
|
103
|
+
}
|
104
|
+
|
105
|
+
.bigger
|
106
|
+
{
|
107
|
+
font-family:sans-serif;
|
108
|
+
font-size:10pt;
|
109
|
+
font-weight:bold
|
110
|
+
}
|
111
|
+
|
112
|
+
</style>
|
113
|
+
|
114
|
+
<div class="table">
|
115
|
+
<div class="bigger">Storage Report</div>
|
116
|
+
<table>
|
117
|
+
<tr>
|
118
|
+
<td style="text-align:right">Disk Capacity:</td><td>| + self.capacity_gb + %q| GB</td>
|
119
|
+
</tr>
|
120
|
+
<tr>
|
121
|
+
<td style="text-align:right">Disk Used:</td><td>| + self.used_gb + %q| GB</td>
|
122
|
+
</tr>
|
123
|
+
<tr>
|
124
|
+
<td style="text-align:right">Free Space:</td><td>| + self.available_gb + %q| GB</td>
|
125
|
+
</tr>
|
126
|
+
</table>
|
127
|
+
|
128
|
+
</div>
|
129
|
+
|
130
|
+
|
131
|
+
<div id="sankey_multiple" style="width: 900px; height: 300px;"></div>
|
132
|
+
|
133
|
+
<script type="text/javascript">
|
134
|
+
|
135
|
+
google.setOnLoadCallback(drawChart);
|
136
|
+
function drawChart() {
|
137
|
+
var data = new google.visualization.DataTable();
|
138
|
+
data.addColumn('string', 'From');
|
139
|
+
data.addColumn('string', 'To');
|
140
|
+
data.addColumn('number', 'Weight');
|
141
|
+
data.addRows( | + self.tree_formatted + %q|);
|
142
|
+
|
143
|
+
// Set chart options
|
144
|
+
var options = {
|
145
|
+
|
146
|
+
width: 1000,
|
147
|
+
sankey: {
|
148
|
+
iterations: 32,
|
149
|
+
node: { label: { fontName: 'Arial',
|
150
|
+
fontSize: 10,
|
151
|
+
color: '#871b47',
|
152
|
+
bold: false,
|
153
|
+
italic: true } } },
|
154
|
+
};
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
// Instantiate and draw our chart, passing in some options.
|
159
|
+
var chart = new google.visualization.Sankey(document.getElementById('sankey_multiple'));
|
160
|
+
chart.draw(data, options);
|
161
|
+
}
|
162
|
+
|
163
|
+
|
164
|
+
</script>
|
165
|
+
</body>
|
166
|
+
</html>|
|
167
|
+
|
168
|
+
|
169
|
+
filename = DateTime.now.strftime("./StorageReport_%Y_%m_%d-%H_%M_%S.html")
|
170
|
+
puts "Writing html file #{filename}"
|
171
|
+
f = File.open(filename, 'w+')
|
172
|
+
f.write(the_html)
|
173
|
+
f.close
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
|
178
|
+
def get_basic_disk_info
|
179
|
+
# df -l gets info about locally-mounted filesystems
|
180
|
+
output = `df -l`
|
181
|
+
# Looks like this:
|
182
|
+
# {"/"=>
|
183
|
+
# {"capacity"=>498876809216, "used"=>434777001984, "available"=>63837663232},
|
184
|
+
# "/Volumes/MobileBackups"=>
|
185
|
+
# {"capacity"=>498876809216, "used"=>498876809216, "available"=>0}
|
186
|
+
# }
|
187
|
+
|
188
|
+
output.lines.each_with_index do |line, index|
|
189
|
+
if (index == 0)
|
190
|
+
next
|
191
|
+
end
|
192
|
+
cols = line.split
|
193
|
+
# ["Filesystem", "512-blocks", "Used", "Available", "Capacity", "iused", "ifree", "%iused", "Mounted", "on"]
|
194
|
+
# line: ["/dev/disk1", "974368768", "849157528", "124699240", "88%", "106208689", "15587405", "87%", "/"]
|
195
|
+
|
196
|
+
self.diskhash[cols[8]] = {
|
197
|
+
'capacity' => (cols[1].to_i * 512).to_i,
|
198
|
+
'used' => (cols[2].to_i * 512).to_i,
|
199
|
+
'available' => (cols[3].to_i * 512).to_i
|
200
|
+
}
|
201
|
+
end
|
202
|
+
|
203
|
+
# puts "Disk mount info:"
|
204
|
+
# pp diskhash
|
205
|
+
self.capacity = self.diskhash['/']['capacity']
|
206
|
+
self.used = self.diskhash['/']['used']
|
207
|
+
self.available = self.diskhash['/']['available']
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
free_space = (self.available).to_i
|
212
|
+
free_space_gb = "#{'%.0f' % (free_space / 1024 / 1024 / 1024)}"
|
213
|
+
free_space_array = ['/', 'Free Space', free_space_gb]
|
214
|
+
self.tree.push(free_space_array)
|
215
|
+
|
216
|
+
self.capacity_gb = "#{'%.0f' % (self.capacity.to_i / 1024 / 1024 / 1024)}"
|
217
|
+
self.used_gb = "#{'%.0f' % (self.used.to_i / 1024 / 1024 / 1024)}"
|
218
|
+
self.available_gb = "#{'%.0f' % (self.available.to_i / 1024 / 1024 / 1024)}"
|
219
|
+
|
220
|
+
|
221
|
+
|
222
|
+
end
|
223
|
+
|
224
|
+
|
225
|
+
def analyze_dirs(dir_to_analyze)
|
226
|
+
|
227
|
+
# bootstrap case
|
228
|
+
if (dir_to_analyze == '/')
|
229
|
+
|
230
|
+
# run on all child dirs
|
231
|
+
Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
|
232
|
+
# puts "\tentry: >#{file}<"
|
233
|
+
full_path = File.join(dir_to_analyze, name)
|
234
|
+
if (Dir.exist?(full_path))
|
235
|
+
# puts "Contender: >#{full_path}<"
|
236
|
+
analyze_dirs(full_path)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
return
|
240
|
+
end
|
241
|
+
|
242
|
+
|
243
|
+
cmd = "du -sx \"#{dir_to_analyze}\""
|
244
|
+
puts "\trunning #{cmd}"
|
245
|
+
output = `#{cmd}`.strip().split("\t")
|
246
|
+
# puts "Du output:"
|
247
|
+
# pp output
|
248
|
+
size = output[0].to_i * 512
|
249
|
+
size_gb = "#{'%.0f' % (size.to_f / 1024 / 1024 / 1024)}"
|
250
|
+
# puts "Size: #{size}\nCapacity: #{self.diskhash['/']['capacity']}"
|
251
|
+
|
252
|
+
occupancy = (size.to_f / self.capacity.to_f)
|
253
|
+
occupancy_pct = "#{'%.0f' % (occupancy * 100)}"
|
254
|
+
|
255
|
+
capacity_gb = "#{'%.0f' % (self.capacity.to_f / 1024 / 1024 / 1024)}"
|
256
|
+
|
257
|
+
# if this dir contains more than 5% of disk space, add it to the tree
|
258
|
+
|
259
|
+
|
260
|
+
if (occupancy > self.threshold_pct)
|
261
|
+
puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
|
262
|
+
# push this dir's info
|
263
|
+
|
264
|
+
if (dir_to_analyze == self.target_dir)
|
265
|
+
|
266
|
+
other_space = self.used - size
|
267
|
+
other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024 / 1024)}"
|
268
|
+
other_space_array = ['/', 'Other', other_space_gb]
|
269
|
+
|
270
|
+
short_target_dir = self.target_dir.split('/').reverse[0]
|
271
|
+
short_target_dir = (short_target_dir == nil) ? self.target_dir : short_target_dir
|
272
|
+
|
273
|
+
comparison = ['/', short_target_dir, size_gb]
|
274
|
+
|
275
|
+
# add them to the array
|
276
|
+
self.tree.push(other_space_array)
|
277
|
+
self.tree.push(comparison)
|
278
|
+
|
279
|
+
else
|
280
|
+
# get parent dir and add to the tree
|
281
|
+
short_parent = dir_to_analyze.split('/').reverse[1]
|
282
|
+
|
283
|
+
# short_parent = (short_parent == nil) ? parent : short_parent
|
284
|
+
# case for when parent is '/'
|
285
|
+
short_parent = (short_parent == '') ? '/' : short_parent
|
286
|
+
|
287
|
+
short_dir = dir_to_analyze.split('/').reverse[0]
|
288
|
+
|
289
|
+
# array_to_push = [parent, dir_to_analyze, size_gb]
|
290
|
+
array_to_push = [short_parent, short_dir, size_gb]
|
291
|
+
self.tree.push(array_to_push)
|
292
|
+
end
|
293
|
+
|
294
|
+
# run on all child dirs
|
295
|
+
Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
|
296
|
+
# puts "\tentry: >#{file}<"
|
297
|
+
|
298
|
+
full_path = File.join(dir_to_analyze, name)
|
299
|
+
|
300
|
+
if (Dir.exist?(full_path))
|
301
|
+
# puts "Contender: >#{full_path}<"
|
302
|
+
analyze_dirs(full_path)
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
end
|
307
|
+
|
308
|
+
end
|
309
|
+
|
310
|
+
|
311
|
+
def run
|
312
|
+
self.get_basic_disk_info
|
313
|
+
self.analyze_dirs(self.target_dir)
|
314
|
+
self.format_data_for_the_chart
|
315
|
+
self.write_storage_report
|
316
|
+
|
317
|
+
end
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
def run
|
324
|
+
|
325
|
+
if (ARGV.length > 0)
|
326
|
+
if (ARGV[0] == '-h')
|
327
|
+
StorageVisualizer.print_usage()
|
328
|
+
return
|
329
|
+
elsif (ARGV[0] == '-i' || ARGV[0] == '--install')
|
330
|
+
# install a soft link to /usr/local/bin
|
331
|
+
cmd = "ln -s #{File.expand_path(__FILE__)} /usr/local/bin/#{File.basename(__FILE__)}"
|
332
|
+
puts "Install cmd: #{cmd}"
|
333
|
+
`#{cmd}`
|
334
|
+
return
|
335
|
+
end
|
336
|
+
vs = StorageVisualizer.new(ARGV[0])
|
337
|
+
else
|
338
|
+
vs = StorageVisualizer.new()
|
339
|
+
end
|
340
|
+
|
341
|
+
puts "\nRunning visualization"
|
342
|
+
vs.run()
|
343
|
+
|
344
|
+
# puts "dumping tree: "
|
345
|
+
# pp vs.tree
|
346
|
+
puts "Formatted tree\n#{vs.tree_formatted}"
|
347
|
+
|
348
|
+
end
|
349
|
+
|
350
|
+
|
351
|
+
# Detect whether being called from command line or API. If command line, run
|
352
|
+
if (File.basename($0) == File.basename(__FILE__))
|
353
|
+
# puts "Being called from command line - running"
|
354
|
+
run
|
355
|
+
else
|
356
|
+
# puts "#{__FILE__} being loaded from API, not running"
|
357
|
+
end
|
358
|
+
|
359
|
+
|
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: storage_visualizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Terry Case
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-25 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: This tool helps visualize which directories are occupying the most storage.
|
14
|
+
Any directory that occupies more than 5% of disk space is added to a visual hierarchichal
|
15
|
+
storage report in the form of a Google Sankey diagram. The storage data is gathered
|
16
|
+
using the linux `du` utility. It has been tested on Mac OSX, should work on linux
|
17
|
+
systems, will not work on Windows. Run as sudo if analyzing otherwise inaccessible
|
18
|
+
directories. May take a while to run.
|
19
|
+
email: terrylcase@gmail.com
|
20
|
+
executables: []
|
21
|
+
extensions: []
|
22
|
+
extra_rdoc_files: []
|
23
|
+
files:
|
24
|
+
- lib/storage_visualizer.rb
|
25
|
+
homepage: https://github.com/teecay/StorageVisualizer
|
26
|
+
licenses:
|
27
|
+
- Creative Commons Attribution 3.0 License
|
28
|
+
metadata: {}
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 2.2.2
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: Creates a webpage showing which directories occupy the most storage using
|
49
|
+
a Google Sankey diagram
|
50
|
+
test_files: []
|