storage_visualizer 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/storage_visualizer.rb +359 -0
  3. metadata +50 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 194d65e776b179be3a357f52d97c0b30f5e396d5
4
+ data.tar.gz: 9299adc1d69eba26b5c9ef859f9606b5be3c7384
5
+ SHA512:
6
+ metadata.gz: 50c06dded42e0726d18e996700c1fb03f3441491a77bff6dcfb660fac4d283d5373cac51836fcaf85f6a4fdd33ea387ed6a21726ca1f6510602b203eb534a13d
7
+ data.tar.gz: 2042e22397e7be0eb2fa8b49836c0a5a24b1c4cffbbd830e8fc676ebf5f5136749a4983badb20126dfe168a10b8aa75382aaca973ffed7426e2fd4bf8fe5b250
@@ -0,0 +1,359 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'pp'
4
+ require 'yaml'
5
+ require 'date'
6
+
7
+ class StorageVisualizer
8
+
9
+ # Static
10
+ def self.print_usage
11
+ puts "\nThis tool helps visualize which directories are occupying the most storage. Any directory that occupies more than 5% of disk space is added to a visual hierarchichal storage report in the form of a Google Sankey diagram. The storage data is gathered using the linux `du` utility. It has been tested on Mac OSX, should work on linux systems, will not work on Windows. Run as sudo if analyzing otherwise inaccessible directories. May take a while to run\n"
12
+ puts "\nCommand line usage: \n\t[sudo] ./visualize_storage.rb [directory to visualize (default ~/) | -h (help) -i | --install (install to /usr/local/bin)]\n\n"
13
+ puts "API usage: "
14
+ puts "\t'require storage_visualizer'"
15
+ puts "\tsv = StorageVisualizer.new('[directory to visualize, ~/ by default]')"
16
+ puts "\tsv.run()\n\n"
17
+ puts "A report will be created in the current directory named as such: StorageReport_2015_05_25-17_19_30.html"
18
+ puts "Status messages are printed to STDOUT"
19
+ puts "\n\n"
20
+ end
21
+
22
+ # To do:
23
+ # - Specify blocksize and do not assume 512 bytes (use the -k flag, which reports blocks as KB)
24
+ # - Allow the threshold to be specified (default is 5%)
25
+ # - Allow output filename to be specified
26
+ # - Enable for filesystems not mounted at the root '/'
27
+ # - Prevent paths on the graph from crossing
28
+ # - See if it would be cleaner to use the googlecharts gem (gem install googlecharts)
29
+ # - Create an installer that sets up cron scheduling and add polling to the webpage
30
+ # - What to do about directories with the same name under different parents
31
+
32
+ # disk Bytes
33
+ attr_accessor :capacity
34
+ attr_accessor :used
35
+ attr_accessor :available
36
+ # disk GB for display
37
+ attr_accessor :capacity_gb
38
+ attr_accessor :used_gb
39
+ attr_accessor :available_gb
40
+ # other
41
+ attr_accessor :target_dir
42
+ attr_accessor :tree
43
+ attr_accessor :tree_formatted
44
+ attr_accessor :diskhash
45
+ attr_accessor :threshold_pct
46
+
47
+ # Constructor
48
+ def initialize(target_dir_passed = nil)
49
+
50
+ if (target_dir_passed != nil)
51
+ expanded = File.expand_path(target_dir_passed)
52
+ puts "Target dir: #{expanded}"
53
+ if (Dir.exist?(expanded))
54
+ self.target_dir = expanded
55
+ else
56
+ raise "Target directory does not exist: #{expanded}"
57
+ end
58
+ else
59
+ # no target passed, use the user's home dir
60
+ self.target_dir = File.expand_path('~')
61
+ end
62
+
63
+
64
+ self.threshold_pct = 0.05
65
+ self.diskhash = {}
66
+ self.tree = []
67
+ self.tree_formatted = ''
68
+ end
69
+
70
+
71
+
72
+ def format_data_for_the_chart
73
+ working_string = "[\n"
74
+
75
+ self.tree.each_with_index do |entry, index|
76
+ if(index == self.tree.length - 1)
77
+ # this is the next to last element, it gets no comma
78
+ working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ]\n"
79
+ else
80
+ # mind the comma
81
+ working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ],\n"
82
+ end
83
+ end
84
+ working_string << "]\n"
85
+ self.tree_formatted = working_string
86
+
87
+ end
88
+
89
+
90
+ def write_storage_report
91
+
92
+ the_html = %q|<html>
93
+ <body>
94
+ <script type="text/javascript"
95
+ src="https://www.google.com/jsapi?autoload={'modules':[{'name':'visualization','version':'1.1','packages':['sankey']}]}">
96
+ </script>
97
+
98
+ <style>
99
+ td
100
+ {
101
+ font-family:sans-serif;
102
+ font-size:8pt;
103
+ }
104
+
105
+ .bigger
106
+ {
107
+ font-family:sans-serif;
108
+ font-size:10pt;
109
+ font-weight:bold
110
+ }
111
+
112
+ </style>
113
+
114
+ <div class="table">
115
+ <div class="bigger">Storage Report</div>
116
+ <table>
117
+ <tr>
118
+ <td style="text-align:right">Disk Capacity:</td><td>| + self.capacity_gb + %q| GB</td>
119
+ </tr>
120
+ <tr>
121
+ <td style="text-align:right">Disk Used:</td><td>| + self.used_gb + %q| GB</td>
122
+ </tr>
123
+ <tr>
124
+ <td style="text-align:right">Free Space:</td><td>| + self.available_gb + %q| GB</td>
125
+ </tr>
126
+ </table>
127
+
128
+ </div>
129
+
130
+
131
+ <div id="sankey_multiple" style="width: 900px; height: 300px;"></div>
132
+
133
+ <script type="text/javascript">
134
+
135
+ google.setOnLoadCallback(drawChart);
136
+ function drawChart() {
137
+ var data = new google.visualization.DataTable();
138
+ data.addColumn('string', 'From');
139
+ data.addColumn('string', 'To');
140
+ data.addColumn('number', 'Weight');
141
+ data.addRows( | + self.tree_formatted + %q|);
142
+
143
+ // Set chart options
144
+ var options = {
145
+
146
+ width: 1000,
147
+ sankey: {
148
+ iterations: 32,
149
+ node: { label: { fontName: 'Arial',
150
+ fontSize: 10,
151
+ color: '#871b47',
152
+ bold: false,
153
+ italic: true } } },
154
+ };
155
+
156
+
157
+
158
+ // Instantiate and draw our chart, passing in some options.
159
+ var chart = new google.visualization.Sankey(document.getElementById('sankey_multiple'));
160
+ chart.draw(data, options);
161
+ }
162
+
163
+
164
+ </script>
165
+ </body>
166
+ </html>|
167
+
168
+
169
+ filename = DateTime.now.strftime("./StorageReport_%Y_%m_%d-%H_%M_%S.html")
170
+ puts "Writing html file #{filename}"
171
+ f = File.open(filename, 'w+')
172
+ f.write(the_html)
173
+ f.close
174
+
175
+ end
176
+
177
+
178
+ def get_basic_disk_info
179
+ # df -l gets info about locally-mounted filesystems
180
+ output = `df -l`
181
+ # Looks like this:
182
+ # {"/"=>
183
+ # {"capacity"=>498876809216, "used"=>434777001984, "available"=>63837663232},
184
+ # "/Volumes/MobileBackups"=>
185
+ # {"capacity"=>498876809216, "used"=>498876809216, "available"=>0}
186
+ # }
187
+
188
+ output.lines.each_with_index do |line, index|
189
+ if (index == 0)
190
+ next
191
+ end
192
+ cols = line.split
193
+ # ["Filesystem", "512-blocks", "Used", "Available", "Capacity", "iused", "ifree", "%iused", "Mounted", "on"]
194
+ # line: ["/dev/disk1", "974368768", "849157528", "124699240", "88%", "106208689", "15587405", "87%", "/"]
195
+
196
+ self.diskhash[cols[8]] = {
197
+ 'capacity' => (cols[1].to_i * 512).to_i,
198
+ 'used' => (cols[2].to_i * 512).to_i,
199
+ 'available' => (cols[3].to_i * 512).to_i
200
+ }
201
+ end
202
+
203
+ # puts "Disk mount info:"
204
+ # pp diskhash
205
+ self.capacity = self.diskhash['/']['capacity']
206
+ self.used = self.diskhash['/']['used']
207
+ self.available = self.diskhash['/']['available']
208
+
209
+
210
+
211
+ free_space = (self.available).to_i
212
+ free_space_gb = "#{'%.0f' % (free_space / 1024 / 1024 / 1024)}"
213
+ free_space_array = ['/', 'Free Space', free_space_gb]
214
+ self.tree.push(free_space_array)
215
+
216
+ self.capacity_gb = "#{'%.0f' % (self.capacity.to_i / 1024 / 1024 / 1024)}"
217
+ self.used_gb = "#{'%.0f' % (self.used.to_i / 1024 / 1024 / 1024)}"
218
+ self.available_gb = "#{'%.0f' % (self.available.to_i / 1024 / 1024 / 1024)}"
219
+
220
+
221
+
222
+ end
223
+
224
+
225
+ def analyze_dirs(dir_to_analyze)
226
+
227
+ # bootstrap case
228
+ if (dir_to_analyze == '/')
229
+
230
+ # run on all child dirs
231
+ Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
232
+ # puts "\tentry: >#{file}<"
233
+ full_path = File.join(dir_to_analyze, name)
234
+ if (Dir.exist?(full_path))
235
+ # puts "Contender: >#{full_path}<"
236
+ analyze_dirs(full_path)
237
+ end
238
+ end
239
+ return
240
+ end
241
+
242
+
243
+ cmd = "du -sx \"#{dir_to_analyze}\""
244
+ puts "\trunning #{cmd}"
245
+ output = `#{cmd}`.strip().split("\t")
246
+ # puts "Du output:"
247
+ # pp output
248
+ size = output[0].to_i * 512
249
+ size_gb = "#{'%.0f' % (size.to_f / 1024 / 1024 / 1024)}"
250
+ # puts "Size: #{size}\nCapacity: #{self.diskhash['/']['capacity']}"
251
+
252
+ occupancy = (size.to_f / self.capacity.to_f)
253
+ occupancy_pct = "#{'%.0f' % (occupancy * 100)}"
254
+
255
+ capacity_gb = "#{'%.0f' % (self.capacity.to_f / 1024 / 1024 / 1024)}"
256
+
257
+ # if this dir contains more than 5% of disk space, add it to the tree
258
+
259
+
260
+ if (occupancy > self.threshold_pct)
261
+ puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
262
+ # push this dir's info
263
+
264
+ if (dir_to_analyze == self.target_dir)
265
+
266
+ other_space = self.used - size
267
+ other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024 / 1024)}"
268
+ other_space_array = ['/', 'Other', other_space_gb]
269
+
270
+ short_target_dir = self.target_dir.split('/').reverse[0]
271
+ short_target_dir = (short_target_dir == nil) ? self.target_dir : short_target_dir
272
+
273
+ comparison = ['/', short_target_dir, size_gb]
274
+
275
+ # add them to the array
276
+ self.tree.push(other_space_array)
277
+ self.tree.push(comparison)
278
+
279
+ else
280
+ # get parent dir and add to the tree
281
+ short_parent = dir_to_analyze.split('/').reverse[1]
282
+
283
+ # short_parent = (short_parent == nil) ? parent : short_parent
284
+ # case for when parent is '/'
285
+ short_parent = (short_parent == '') ? '/' : short_parent
286
+
287
+ short_dir = dir_to_analyze.split('/').reverse[0]
288
+
289
+ # array_to_push = [parent, dir_to_analyze, size_gb]
290
+ array_to_push = [short_parent, short_dir, size_gb]
291
+ self.tree.push(array_to_push)
292
+ end
293
+
294
+ # run on all child dirs
295
+ Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
296
+ # puts "\tentry: >#{file}<"
297
+
298
+ full_path = File.join(dir_to_analyze, name)
299
+
300
+ if (Dir.exist?(full_path))
301
+ # puts "Contender: >#{full_path}<"
302
+ analyze_dirs(full_path)
303
+ end
304
+ end
305
+
306
+ end
307
+
308
+ end
309
+
310
+
311
+ def run
312
+ self.get_basic_disk_info
313
+ self.analyze_dirs(self.target_dir)
314
+ self.format_data_for_the_chart
315
+ self.write_storage_report
316
+
317
+ end
318
+
319
+ end
320
+
321
+
322
+
323
+ def run
324
+
325
+ if (ARGV.length > 0)
326
+ if (ARGV[0] == '-h')
327
+ StorageVisualizer.print_usage()
328
+ return
329
+ elsif (ARGV[0] == '-i' || ARGV[0] == '--install')
330
+ # install a soft link to /usr/local/bin
331
+ cmd = "ln -s #{File.expand_path(__FILE__)} /usr/local/bin/#{File.basename(__FILE__)}"
332
+ puts "Install cmd: #{cmd}"
333
+ `#{cmd}`
334
+ return
335
+ end
336
+ vs = StorageVisualizer.new(ARGV[0])
337
+ else
338
+ vs = StorageVisualizer.new()
339
+ end
340
+
341
+ puts "\nRunning visualization"
342
+ vs.run()
343
+
344
+ # puts "dumping tree: "
345
+ # pp vs.tree
346
+ puts "Formatted tree\n#{vs.tree_formatted}"
347
+
348
+ end
349
+
350
+
351
+ # Detect whether being called from command line or API. If command line, run
352
+ if (File.basename($0) == File.basename(__FILE__))
353
+ # puts "Being called from command line - running"
354
+ run
355
+ else
356
+ # puts "#{__FILE__} being loaded from API, not running"
357
+ end
358
+
359
+
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: storage_visualizer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Terry Case
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: This tool helps visualize which directories are occupying the most storage.
14
+ Any directory that occupies more than 5% of disk space is added to a visual hierarchichal
15
+ storage report in the form of a Google Sankey diagram. The storage data is gathered
16
+ using the linux `du` utility. It has been tested on Mac OSX, should work on linux
17
+ systems, will not work on Windows. Run as sudo if analyzing otherwise inaccessible
18
+ directories. May take a while to run.
19
+ email: terrylcase@gmail.com
20
+ executables: []
21
+ extensions: []
22
+ extra_rdoc_files: []
23
+ files:
24
+ - lib/storage_visualizer.rb
25
+ homepage: https://github.com/teecay/StorageVisualizer
26
+ licenses:
27
+ - Creative Commons Attribution 3.0 License
28
+ metadata: {}
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 2.2.2
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: Creates a webpage showing which directories occupy the most storage using
49
+ a Google Sankey diagram
50
+ test_files: []