storage_visualizer 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/storage_visualizer.rb +183 -76
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dafd6c489f0809c633d222e2f2e46a1f5f1f087c
|
4
|
+
data.tar.gz: 6c6b72560460adb5b4c13398fa26adc9b209e85b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 496b3d800a10b5331980c9acd22e2e416f27ac628f2b260a17b723edde95845df949829f10fe99b8daac803a323a134ad86729e09ff2815f16e6a0f8819e5662
|
7
|
+
data.tar.gz: 0fc8ac60e6df0ecd7a5566fbda1e136e6e497a77843a55b26e7997a9e81f13768d47d3dd96ac8eb819d37d73db54fb4cc2ad7576c4f5b11f8157a97765edf722
|
data/lib/storage_visualizer.rb
CHANGED
@@ -20,6 +20,30 @@
|
|
20
20
|
require 'pp'
|
21
21
|
require 'yaml'
|
22
22
|
require 'date'
|
23
|
+
require 'uri'
|
24
|
+
require 'cgi'
|
25
|
+
require 'json'
|
26
|
+
|
27
|
+
|
28
|
+
class DirNode
|
29
|
+
attr_accessor :parent
|
30
|
+
attr_accessor :dir_name
|
31
|
+
attr_accessor :dir_short
|
32
|
+
attr_accessor :size_gb
|
33
|
+
attr_accessor :children
|
34
|
+
|
35
|
+
def initialize(parent_in, dir_name_in, dir_short_in, size_gb_in)
|
36
|
+
self.parent = parent_in
|
37
|
+
self.dir_name = dir_name_in
|
38
|
+
self.dir_short = dir_short_in
|
39
|
+
self.size_gb = size_gb_in
|
40
|
+
self.children = []
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
|
23
47
|
|
24
48
|
class StorageVisualizer
|
25
49
|
|
@@ -85,16 +109,15 @@ class StorageVisualizer
|
|
85
109
|
|
86
110
|
|
87
111
|
# To do:
|
88
|
-
# x Make it work on linux
|
112
|
+
# x Make it work on mac & linux (CentOS & Ubuntu)
|
89
113
|
# x Specify blocksize and do not assume 512 bytes (use the -k flag, which reports blocks as KB)
|
90
|
-
#
|
114
|
+
# x Enable for filesystems not mounted at the root '/'
|
91
115
|
# - Allow the threshold to be specified (default is 5%)
|
92
116
|
# - Allow output filename to be specified
|
93
117
|
# Maybe:
|
94
|
-
#
|
118
|
+
# x Prevent paths on the graph from crossing (dirs with the same name become the same node)
|
95
119
|
# - See if it would be cleaner to use the googlecharts gem (gem install googlecharts)
|
96
|
-
|
97
|
-
# - What to do about directories with the same name under different parents
|
120
|
+
|
98
121
|
|
99
122
|
# disk Bytes
|
100
123
|
attr_accessor :capacity
|
@@ -110,13 +133,18 @@ class StorageVisualizer
|
|
110
133
|
attr_accessor :tree_formatted
|
111
134
|
attr_accessor :diskhash
|
112
135
|
attr_accessor :threshold_pct
|
136
|
+
attr_accessor :target_volume
|
137
|
+
attr_accessor :dupe_counter
|
138
|
+
|
139
|
+
# this is the root DirNode object
|
140
|
+
attr_accessor :dir_tree
|
113
141
|
|
114
142
|
# Constructor
|
115
143
|
def initialize(target_dir_passed = nil)
|
116
144
|
|
117
145
|
if (target_dir_passed != nil)
|
118
146
|
expanded = File.expand_path(target_dir_passed)
|
119
|
-
puts "Target dir: #{expanded}"
|
147
|
+
# puts "Target dir: #{expanded}"
|
120
148
|
if (Dir.exist?(expanded))
|
121
149
|
self.target_dir = expanded
|
122
150
|
else
|
@@ -127,25 +155,44 @@ class StorageVisualizer
|
|
127
155
|
self.target_dir = File.expand_path('~')
|
128
156
|
end
|
129
157
|
|
130
|
-
|
158
|
+
# how much space is considered worthy of noting on the chart
|
131
159
|
self.threshold_pct = 0.05
|
132
160
|
self.diskhash = {}
|
133
161
|
self.tree = []
|
134
162
|
self.tree_formatted = ''
|
163
|
+
self.dupe_counter = 0
|
135
164
|
end
|
136
165
|
|
137
166
|
|
138
167
|
|
139
168
|
def format_data_for_the_chart
|
140
|
-
|
169
|
+
|
170
|
+
# Build the list of nodes
|
171
|
+
nodes = []
|
172
|
+
nodes.push(self.dir_tree)
|
173
|
+
comparison_list = []
|
174
|
+
while true
|
175
|
+
if (nodes.length == 0)
|
176
|
+
break
|
177
|
+
end
|
178
|
+
node = nodes.shift
|
179
|
+
comparison_list.push(node)
|
180
|
+
nodes.concat(node.children)
|
181
|
+
end
|
141
182
|
|
142
|
-
|
143
|
-
|
183
|
+
|
184
|
+
# format the data for the chart
|
185
|
+
working_string = "[\n"
|
186
|
+
comparison_list.each_with_index do |entry, index|
|
187
|
+
if (entry.parent == nil)
|
188
|
+
next
|
189
|
+
end
|
190
|
+
if(index == comparison_list.length - 1)
|
144
191
|
# this is the next to last element, it gets no comma
|
145
|
-
working_string << "[ '#{entry
|
192
|
+
working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ]\n"
|
146
193
|
else
|
147
194
|
# mind the comma
|
148
|
-
working_string << "[ '#{entry
|
195
|
+
working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ],\n"
|
149
196
|
end
|
150
197
|
end
|
151
198
|
working_string << "]\n"
|
@@ -154,6 +201,7 @@ class StorageVisualizer
|
|
154
201
|
end
|
155
202
|
|
156
203
|
|
204
|
+
|
157
205
|
def write_storage_report
|
158
206
|
|
159
207
|
the_html = %q|<html>
|
@@ -204,7 +252,7 @@ class StorageVisualizer
|
|
204
252
|
var data = new google.visualization.DataTable();
|
205
253
|
data.addColumn('string', 'From');
|
206
254
|
data.addColumn('string', 'To');
|
207
|
-
data.addColumn('number', '
|
255
|
+
data.addColumn('number', 'Size (GB)');
|
208
256
|
data.addRows( | + self.tree_formatted + %q|);
|
209
257
|
|
210
258
|
// Set chart options
|
@@ -213,11 +261,12 @@ class StorageVisualizer
|
|
213
261
|
width: 1000,
|
214
262
|
sankey: {
|
215
263
|
iterations: 32,
|
216
|
-
node: { label: {
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
264
|
+
node: { label: {
|
265
|
+
fontName: 'Arial',
|
266
|
+
fontSize: 10,
|
267
|
+
color: '#871b47',
|
268
|
+
bold: false,
|
269
|
+
italic: true } } },
|
221
270
|
};
|
222
271
|
|
223
272
|
|
@@ -246,7 +295,7 @@ class StorageVisualizer
|
|
246
295
|
# df -l gets info about locally-mounted filesystems
|
247
296
|
output = `df -lk`
|
248
297
|
|
249
|
-
# OSX
|
298
|
+
# OSX:
|
250
299
|
# Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on
|
251
300
|
# /dev/disk1 975912960 349150592 626506368 36% 87351646 156626592 36% /
|
252
301
|
# localhost:/QwnJE6UBvlR1EvqouX6gMM 975912960 975912960 0 100% 0 0 100% /Volumes/MobileBackups
|
@@ -275,8 +324,10 @@ class StorageVisualizer
|
|
275
324
|
# {"capacity"=>498876809216, "used"=>498876809216, "available"=>0}
|
276
325
|
# }
|
277
326
|
|
327
|
+
# get each mount's capacity & utilization
|
278
328
|
output.lines.each_with_index do |line, index|
|
279
329
|
if (index == 0)
|
330
|
+
# skip the header line
|
280
331
|
next
|
281
332
|
end
|
282
333
|
cols = line.split
|
@@ -304,45 +355,62 @@ class StorageVisualizer
|
|
304
355
|
|
305
356
|
# puts "Disk mount info:"
|
306
357
|
# pp diskhash
|
307
|
-
self.capacity = self.diskhash['/']['capacity']
|
308
|
-
self.used = self.diskhash['/']['used']
|
309
|
-
self.available = self.diskhash['/']['available']
|
310
358
|
|
311
359
|
|
360
|
+
# find the (self.)target_volume
|
361
|
+
# look through diskhash keys, to find the one that most matches target_dir
|
362
|
+
val_of_min = 1000
|
363
|
+
# puts "Determining which volume contains the target directory.."
|
364
|
+
self.diskhash.keys.each do |volume|
|
365
|
+
result = self.target_dir.gsub(volume, '')
|
366
|
+
diskhash['match_amt'] = result.length
|
367
|
+
# puts "Considering:\t#{volume}, \t closeness: #{result.length}, \t (#{result})"
|
368
|
+
if (result.length < val_of_min)
|
369
|
+
# puts "Candidate: #{volume}"
|
370
|
+
val_of_min = result.length
|
371
|
+
self.target_volume = volume
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
puts "Target volume is #{self.target_volume}"
|
376
|
+
|
312
377
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
self.tree.push(free_space_array)
|
378
|
+
self.capacity = self.diskhash[self.target_volume]['capacity']
|
379
|
+
self.used = self.diskhash[self.target_volume]['used']
|
380
|
+
self.available = self.diskhash[self.target_volume]['available']
|
317
381
|
|
318
382
|
self.capacity_gb = "#{'%.0f' % (self.capacity.to_i / 1024 / 1024)}"
|
319
383
|
self.used_gb = "#{'%.0f' % (self.used.to_i / 1024 / 1024)}"
|
320
384
|
self.available_gb = "#{'%.0f' % (self.available.to_i / 1024 / 1024)}"
|
321
385
|
|
322
|
-
|
386
|
+
self.dir_tree = DirNode.new(nil, self.target_volume, self.target_volume, self.capacity)
|
387
|
+
self.dir_tree.children.push(DirNode.new(self.dir_tree, 'Free Space', 'Free Space', self.available_gb))
|
323
388
|
|
324
389
|
end
|
325
390
|
|
326
391
|
|
327
|
-
|
392
|
+
# Crawl the dirs recursively, beginning with the target dir
|
393
|
+
def analyze_dirs(dir_to_analyze, parent)
|
328
394
|
|
329
|
-
# bootstrap case
|
330
|
-
if (dir_to_analyze == '/')
|
331
395
|
|
332
|
-
|
396
|
+
# bootstrap case
|
397
|
+
# don't create an entry for the root because there's nothing to link to yet, scan the subdirs
|
398
|
+
if (dir_to_analyze == self.target_volume)
|
399
|
+
# puts "Dir to analyze is the target volume"
|
400
|
+
# run on all child dirs, not this dir
|
333
401
|
Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
|
334
402
|
# puts "\tentry: >#{file}<"
|
335
403
|
full_path = File.join(dir_to_analyze, name)
|
336
|
-
if (Dir.exist?(full_path))
|
404
|
+
if (Dir.exist?(full_path) && !File.symlink?(full_path))
|
337
405
|
# puts "Contender: >#{full_path}<"
|
338
|
-
analyze_dirs(full_path)
|
406
|
+
analyze_dirs(full_path, self.dir_tree)
|
339
407
|
end
|
340
408
|
end
|
341
409
|
return
|
342
410
|
end
|
343
411
|
|
344
|
-
|
345
|
-
cmd = "du -
|
412
|
+
# use "P" to help prevent following any symlinks
|
413
|
+
cmd = "du -sxkP \"#{dir_to_analyze}\""
|
346
414
|
puts "\trunning #{cmd}"
|
347
415
|
output = `#{cmd}`.strip().split("\t")
|
348
416
|
# puts "Du output:"
|
@@ -350,69 +418,108 @@ class StorageVisualizer
|
|
350
418
|
size = output[0].to_i
|
351
419
|
size_gb = "#{'%.0f' % (size.to_f / 1024 / 1024)}"
|
352
420
|
# puts "Size: #{size}\nCapacity: #{self.diskhash['/']['capacity']}"
|
353
|
-
|
354
|
-
|
421
|
+
|
422
|
+
# Occupancy as a fraction of total space
|
423
|
+
# occupancy = (size.to_f / self.capacity.to_f)
|
424
|
+
|
425
|
+
# Occupancy as a fraction of USED space
|
426
|
+
occupancy = (size.to_f / self.used.to_f)
|
427
|
+
|
355
428
|
occupancy_pct = "#{'%.0f' % (occupancy * 100)}"
|
356
|
-
|
357
429
|
capacity_gb = "#{'%.0f' % (self.capacity.to_f / 1024 / 1024)}"
|
358
430
|
|
359
431
|
# if this dir contains more than 5% of disk space, add it to the tree
|
432
|
+
|
433
|
+
if (dir_to_analyze == self.target_dir)
|
434
|
+
# puts "Dir to analyze is the target dir, space used outside this dir.."
|
435
|
+
# account for space used outside of target dir
|
436
|
+
other_space = self.used - size
|
437
|
+
other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
|
438
|
+
parent.children.push(DirNode.new(parent, self.target_volume, self.target_volume, other_space_gb))
|
439
|
+
end
|
360
440
|
|
361
441
|
|
362
442
|
if (occupancy > self.threshold_pct)
|
363
|
-
puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
|
364
|
-
#
|
365
|
-
|
366
|
-
if (dir_to_analyze == self.target_dir)
|
367
|
-
|
368
|
-
other_space = self.used - size
|
369
|
-
other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
|
370
|
-
other_space_array = ['/', 'Other', other_space_gb]
|
371
|
-
|
372
|
-
short_target_dir = self.target_dir.split('/').reverse[0]
|
373
|
-
short_target_dir = (short_target_dir == nil) ? self.target_dir : short_target_dir
|
374
|
-
|
375
|
-
comparison = ['/', short_target_dir, size_gb]
|
376
|
-
|
377
|
-
# add them to the array
|
378
|
-
self.tree.push(other_space_array)
|
379
|
-
self.tree.push(comparison)
|
443
|
+
# puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
|
444
|
+
puts "Dir contains more than 5% of used disk space: #{dir_to_analyze} \n\tsize:\t\t#{size_gb} / \n\toccupancy:\t#{self.used_gb} = #{occupancy_pct}% of used space"
|
380
445
|
|
446
|
+
# puts "Dir to analyze (#{dir_to_analyze}) is not the target dir (#{self.target_dir})"
|
447
|
+
dirs = dir_to_analyze.split('/')
|
448
|
+
|
449
|
+
short_dir = dirs.pop().gsub("'","\\\\'")
|
450
|
+
full_parent = dirs.join('/')
|
451
|
+
if (dir_to_analyze == self.target_dir || full_parent == self.target_volume)
|
452
|
+
# puts "Either this dir is the target dir, or the parent is the target volume, make parent the full target volume"
|
453
|
+
short_parent = self.target_volume.gsub("'","\\\\'")
|
381
454
|
else
|
382
|
-
#
|
383
|
-
short_parent =
|
384
|
-
|
385
|
-
# short_parent = (short_parent == nil) ? parent : short_parent
|
386
|
-
# case for when parent is '/'
|
387
|
-
short_parent = (short_parent == '') ? '/' : short_parent
|
388
|
-
|
389
|
-
short_dir = dir_to_analyze.split('/').reverse[0]
|
390
|
-
|
391
|
-
# array_to_push = [parent, dir_to_analyze, size_gb]
|
392
|
-
array_to_push = [short_parent, short_dir, size_gb]
|
393
|
-
self.tree.push(array_to_push)
|
455
|
+
# puts "Neither this dir or parent is the target dir, making parent short"
|
456
|
+
short_parent = dirs.pop().gsub("'","\\\\'")
|
394
457
|
end
|
458
|
+
|
459
|
+
|
460
|
+
this_node = DirNode.new(parent, dir_to_analyze, short_dir, size_gb)
|
461
|
+
parent.children.push(this_node)
|
395
462
|
|
396
463
|
# run on all child dirs
|
397
464
|
Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
|
398
|
-
# puts "\tentry: >#{file}<"
|
399
|
-
|
400
465
|
full_path = File.join(dir_to_analyze, name)
|
401
|
-
|
402
|
-
if (Dir.exist?(full_path))
|
466
|
+
# don't follow any symlinks
|
467
|
+
if (Dir.exist?(full_path) && !File.symlink?(full_path))
|
403
468
|
# puts "Contender: >#{full_path}<"
|
404
|
-
analyze_dirs(full_path)
|
469
|
+
analyze_dirs(full_path, this_node)
|
405
470
|
end
|
406
471
|
end
|
407
472
|
|
408
|
-
end
|
473
|
+
end # occupancy > threshold
|
474
|
+
|
475
|
+
end # function
|
476
|
+
|
477
|
+
|
478
|
+
|
479
|
+
def traverse_tree_and_remove_duplicates
|
480
|
+
puts "\nHandling duplicate entries.."
|
481
|
+
nodes = []
|
482
|
+
nodes.push(self.dir_tree)
|
483
|
+
comparison_list = []
|
484
|
+
while true
|
485
|
+
if (nodes.length == 0)
|
486
|
+
break
|
487
|
+
end
|
409
488
|
|
489
|
+
node = nodes.shift
|
490
|
+
comparison_list.push(node)
|
491
|
+
# pp node
|
492
|
+
if node.parent == nil
|
493
|
+
# puts "\tparent: no parent \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
|
494
|
+
else
|
495
|
+
# puts "\tparent: #{node.parent.dir_short.to_s} \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
|
496
|
+
end
|
497
|
+
nodes.concat(node.children)
|
498
|
+
end
|
499
|
+
# puts "Done building node list"
|
500
|
+
|
501
|
+
|
502
|
+
|
503
|
+
for i in 0..comparison_list.length do
|
504
|
+
for j in 0..comparison_list.length do
|
505
|
+
if (comparison_list[i] != nil && comparison_list[j] != nil)
|
506
|
+
if (i != j && comparison_list[i].dir_short == comparison_list[j].dir_short)
|
507
|
+
puts "\t#{comparison_list[i].dir_short} is the same as #{comparison_list[j].dir_short}, changing to #{comparison_list[j].dir_short}*"
|
508
|
+
comparison_list[j].dir_short = "#{comparison_list[j].dir_short}*"
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
end
|
513
|
+
puts "Duplicate handling complete"
|
514
|
+
|
410
515
|
end
|
411
516
|
|
517
|
+
|
412
518
|
|
413
519
|
def run
|
414
520
|
self.get_basic_disk_info
|
415
|
-
self.analyze_dirs(self.target_dir)
|
521
|
+
self.analyze_dirs(self.target_dir, self.dir_tree)
|
522
|
+
self.traverse_tree_and_remove_duplicates
|
416
523
|
self.format_data_for_the_chart
|
417
524
|
self.write_storage_report
|
418
525
|
|
@@ -438,12 +545,12 @@ def run
|
|
438
545
|
vs = StorageVisualizer.new()
|
439
546
|
end
|
440
547
|
|
441
|
-
puts "\nRunning visualization"
|
548
|
+
# puts "\nRunning visualization"
|
442
549
|
vs.run()
|
443
550
|
|
444
551
|
# puts "dumping tree: "
|
445
552
|
# pp vs.tree
|
446
|
-
puts "Formatted tree\n#{vs.tree_formatted}"
|
553
|
+
# puts "Formatted tree\n#{vs.tree_formatted}"
|
447
554
|
|
448
555
|
end
|
449
556
|
|
metadata
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: storage_visualizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Terry Case
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This tool helps visualize which directories are occupying the most storage.
|
14
|
-
Any directory that occupies more than 5% of disk space is added to a visual
|
15
|
-
storage report in the form of a Google Sankey diagram. The storage
|
16
|
-
using the linux `du` utility. It has been tested on Mac OSX
|
17
|
-
systems, will not work on Windows. Run as sudo if analyzing
|
18
|
-
directories. May take a while to run.
|
14
|
+
Any directory that occupies more than 5% of used disk space is added to a visual
|
15
|
+
hierarchichal storage report in the form of a Google Sankey diagram. The storage
|
16
|
+
data is gathered using the linux `du` utility. It has been tested on Mac OSX and
|
17
|
+
linux systems (Ubuntu & CentOS), will not work on Windows. Run as sudo if analyzing
|
18
|
+
otherwise inaccessible directories. May take a while to run.
|
19
19
|
email: terrylcase@gmail.com
|
20
20
|
executables: []
|
21
21
|
extensions: []
|