storage_visualizer 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/storage_visualizer.rb +183 -76
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dafd6c489f0809c633d222e2f2e46a1f5f1f087c
|
4
|
+
data.tar.gz: 6c6b72560460adb5b4c13398fa26adc9b209e85b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 496b3d800a10b5331980c9acd22e2e416f27ac628f2b260a17b723edde95845df949829f10fe99b8daac803a323a134ad86729e09ff2815f16e6a0f8819e5662
|
7
|
+
data.tar.gz: 0fc8ac60e6df0ecd7a5566fbda1e136e6e497a77843a55b26e7997a9e81f13768d47d3dd96ac8eb819d37d73db54fb4cc2ad7576c4f5b11f8157a97765edf722
|
data/lib/storage_visualizer.rb
CHANGED
@@ -20,6 +20,30 @@
|
|
20
20
|
require 'pp'
|
21
21
|
require 'yaml'
|
22
22
|
require 'date'
|
23
|
+
require 'uri'
|
24
|
+
require 'cgi'
|
25
|
+
require 'json'
|
26
|
+
|
27
|
+
|
28
|
+
class DirNode
|
29
|
+
attr_accessor :parent
|
30
|
+
attr_accessor :dir_name
|
31
|
+
attr_accessor :dir_short
|
32
|
+
attr_accessor :size_gb
|
33
|
+
attr_accessor :children
|
34
|
+
|
35
|
+
def initialize(parent_in, dir_name_in, dir_short_in, size_gb_in)
|
36
|
+
self.parent = parent_in
|
37
|
+
self.dir_name = dir_name_in
|
38
|
+
self.dir_short = dir_short_in
|
39
|
+
self.size_gb = size_gb_in
|
40
|
+
self.children = []
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
|
23
47
|
|
24
48
|
class StorageVisualizer
|
25
49
|
|
@@ -85,16 +109,15 @@ class StorageVisualizer
|
|
85
109
|
|
86
110
|
|
87
111
|
# To do:
|
88
|
-
# x Make it work on linux
|
112
|
+
# x Make it work on mac & linux (CentOS & Ubuntu)
|
89
113
|
# x Specify blocksize and do not assume 512 bytes (use the -k flag, which reports blocks as KB)
|
90
|
-
#
|
114
|
+
# x Enable for filesystems not mounted at the root '/'
|
91
115
|
# - Allow the threshold to be specified (default is 5%)
|
92
116
|
# - Allow output filename to be specified
|
93
117
|
# Maybe:
|
94
|
-
#
|
118
|
+
# x Prevent paths on the graph from crossing (dirs with the same name become the same node)
|
95
119
|
# - See if it would be cleaner to use the googlecharts gem (gem install googlecharts)
|
96
|
-
|
97
|
-
# - What to do about directories with the same name under different parents
|
120
|
+
|
98
121
|
|
99
122
|
# disk Bytes
|
100
123
|
attr_accessor :capacity
|
@@ -110,13 +133,18 @@ class StorageVisualizer
|
|
110
133
|
attr_accessor :tree_formatted
|
111
134
|
attr_accessor :diskhash
|
112
135
|
attr_accessor :threshold_pct
|
136
|
+
attr_accessor :target_volume
|
137
|
+
attr_accessor :dupe_counter
|
138
|
+
|
139
|
+
# this is the root DirNode object
|
140
|
+
attr_accessor :dir_tree
|
113
141
|
|
114
142
|
# Constructor
|
115
143
|
def initialize(target_dir_passed = nil)
|
116
144
|
|
117
145
|
if (target_dir_passed != nil)
|
118
146
|
expanded = File.expand_path(target_dir_passed)
|
119
|
-
puts "Target dir: #{expanded}"
|
147
|
+
# puts "Target dir: #{expanded}"
|
120
148
|
if (Dir.exist?(expanded))
|
121
149
|
self.target_dir = expanded
|
122
150
|
else
|
@@ -127,25 +155,44 @@ class StorageVisualizer
|
|
127
155
|
self.target_dir = File.expand_path('~')
|
128
156
|
end
|
129
157
|
|
130
|
-
|
158
|
+
# how much space is considered worthy of noting on the chart
|
131
159
|
self.threshold_pct = 0.05
|
132
160
|
self.diskhash = {}
|
133
161
|
self.tree = []
|
134
162
|
self.tree_formatted = ''
|
163
|
+
self.dupe_counter = 0
|
135
164
|
end
|
136
165
|
|
137
166
|
|
138
167
|
|
139
168
|
def format_data_for_the_chart
|
140
|
-
|
169
|
+
|
170
|
+
# Build the list of nodes
|
171
|
+
nodes = []
|
172
|
+
nodes.push(self.dir_tree)
|
173
|
+
comparison_list = []
|
174
|
+
while true
|
175
|
+
if (nodes.length == 0)
|
176
|
+
break
|
177
|
+
end
|
178
|
+
node = nodes.shift
|
179
|
+
comparison_list.push(node)
|
180
|
+
nodes.concat(node.children)
|
181
|
+
end
|
141
182
|
|
142
|
-
|
143
|
-
|
183
|
+
|
184
|
+
# format the data for the chart
|
185
|
+
working_string = "[\n"
|
186
|
+
comparison_list.each_with_index do |entry, index|
|
187
|
+
if (entry.parent == nil)
|
188
|
+
next
|
189
|
+
end
|
190
|
+
if(index == comparison_list.length - 1)
|
144
191
|
# this is the next to last element, it gets no comma
|
145
|
-
working_string << "[ '#{entry
|
192
|
+
working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ]\n"
|
146
193
|
else
|
147
194
|
# mind the comma
|
148
|
-
working_string << "[ '#{entry
|
195
|
+
working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ],\n"
|
149
196
|
end
|
150
197
|
end
|
151
198
|
working_string << "]\n"
|
@@ -154,6 +201,7 @@ class StorageVisualizer
|
|
154
201
|
end
|
155
202
|
|
156
203
|
|
204
|
+
|
157
205
|
def write_storage_report
|
158
206
|
|
159
207
|
the_html = %q|<html>
|
@@ -204,7 +252,7 @@ class StorageVisualizer
|
|
204
252
|
var data = new google.visualization.DataTable();
|
205
253
|
data.addColumn('string', 'From');
|
206
254
|
data.addColumn('string', 'To');
|
207
|
-
data.addColumn('number', '
|
255
|
+
data.addColumn('number', 'Size (GB)');
|
208
256
|
data.addRows( | + self.tree_formatted + %q|);
|
209
257
|
|
210
258
|
// Set chart options
|
@@ -213,11 +261,12 @@ class StorageVisualizer
|
|
213
261
|
width: 1000,
|
214
262
|
sankey: {
|
215
263
|
iterations: 32,
|
216
|
-
node: { label: {
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
264
|
+
node: { label: {
|
265
|
+
fontName: 'Arial',
|
266
|
+
fontSize: 10,
|
267
|
+
color: '#871b47',
|
268
|
+
bold: false,
|
269
|
+
italic: true } } },
|
221
270
|
};
|
222
271
|
|
223
272
|
|
@@ -246,7 +295,7 @@ class StorageVisualizer
|
|
246
295
|
# df -l gets info about locally-mounted filesystems
|
247
296
|
output = `df -lk`
|
248
297
|
|
249
|
-
# OSX
|
298
|
+
# OSX:
|
250
299
|
# Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on
|
251
300
|
# /dev/disk1 975912960 349150592 626506368 36% 87351646 156626592 36% /
|
252
301
|
# localhost:/QwnJE6UBvlR1EvqouX6gMM 975912960 975912960 0 100% 0 0 100% /Volumes/MobileBackups
|
@@ -275,8 +324,10 @@ class StorageVisualizer
|
|
275
324
|
# {"capacity"=>498876809216, "used"=>498876809216, "available"=>0}
|
276
325
|
# }
|
277
326
|
|
327
|
+
# get each mount's capacity & utilization
|
278
328
|
output.lines.each_with_index do |line, index|
|
279
329
|
if (index == 0)
|
330
|
+
# skip the header line
|
280
331
|
next
|
281
332
|
end
|
282
333
|
cols = line.split
|
@@ -304,45 +355,62 @@ class StorageVisualizer
|
|
304
355
|
|
305
356
|
# puts "Disk mount info:"
|
306
357
|
# pp diskhash
|
307
|
-
self.capacity = self.diskhash['/']['capacity']
|
308
|
-
self.used = self.diskhash['/']['used']
|
309
|
-
self.available = self.diskhash['/']['available']
|
310
358
|
|
311
359
|
|
360
|
+
# find the (self.)target_volume
|
361
|
+
# look through diskhash keys, to find the one that most matches target_dir
|
362
|
+
val_of_min = 1000
|
363
|
+
# puts "Determining which volume contains the target directory.."
|
364
|
+
self.diskhash.keys.each do |volume|
|
365
|
+
result = self.target_dir.gsub(volume, '')
|
366
|
+
diskhash['match_amt'] = result.length
|
367
|
+
# puts "Considering:\t#{volume}, \t closeness: #{result.length}, \t (#{result})"
|
368
|
+
if (result.length < val_of_min)
|
369
|
+
# puts "Candidate: #{volume}"
|
370
|
+
val_of_min = result.length
|
371
|
+
self.target_volume = volume
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
puts "Target volume is #{self.target_volume}"
|
376
|
+
|
312
377
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
self.tree.push(free_space_array)
|
378
|
+
self.capacity = self.diskhash[self.target_volume]['capacity']
|
379
|
+
self.used = self.diskhash[self.target_volume]['used']
|
380
|
+
self.available = self.diskhash[self.target_volume]['available']
|
317
381
|
|
318
382
|
self.capacity_gb = "#{'%.0f' % (self.capacity.to_i / 1024 / 1024)}"
|
319
383
|
self.used_gb = "#{'%.0f' % (self.used.to_i / 1024 / 1024)}"
|
320
384
|
self.available_gb = "#{'%.0f' % (self.available.to_i / 1024 / 1024)}"
|
321
385
|
|
322
|
-
|
386
|
+
self.dir_tree = DirNode.new(nil, self.target_volume, self.target_volume, self.capacity)
|
387
|
+
self.dir_tree.children.push(DirNode.new(self.dir_tree, 'Free Space', 'Free Space', self.available_gb))
|
323
388
|
|
324
389
|
end
|
325
390
|
|
326
391
|
|
327
|
-
|
392
|
+
# Crawl the dirs recursively, beginning with the target dir
|
393
|
+
def analyze_dirs(dir_to_analyze, parent)
|
328
394
|
|
329
|
-
# bootstrap case
|
330
|
-
if (dir_to_analyze == '/')
|
331
395
|
|
332
|
-
|
396
|
+
# bootstrap case
|
397
|
+
# don't create an entry for the root because there's nothing to link to yet, scan the subdirs
|
398
|
+
if (dir_to_analyze == self.target_volume)
|
399
|
+
# puts "Dir to analyze is the target volume"
|
400
|
+
# run on all child dirs, not this dir
|
333
401
|
Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
|
334
402
|
# puts "\tentry: >#{file}<"
|
335
403
|
full_path = File.join(dir_to_analyze, name)
|
336
|
-
if (Dir.exist?(full_path))
|
404
|
+
if (Dir.exist?(full_path) && !File.symlink?(full_path))
|
337
405
|
# puts "Contender: >#{full_path}<"
|
338
|
-
analyze_dirs(full_path)
|
406
|
+
analyze_dirs(full_path, self.dir_tree)
|
339
407
|
end
|
340
408
|
end
|
341
409
|
return
|
342
410
|
end
|
343
411
|
|
344
|
-
|
345
|
-
cmd = "du -
|
412
|
+
# use "P" to help prevent following any symlinks
|
413
|
+
cmd = "du -sxkP \"#{dir_to_analyze}\""
|
346
414
|
puts "\trunning #{cmd}"
|
347
415
|
output = `#{cmd}`.strip().split("\t")
|
348
416
|
# puts "Du output:"
|
@@ -350,69 +418,108 @@ class StorageVisualizer
|
|
350
418
|
size = output[0].to_i
|
351
419
|
size_gb = "#{'%.0f' % (size.to_f / 1024 / 1024)}"
|
352
420
|
# puts "Size: #{size}\nCapacity: #{self.diskhash['/']['capacity']}"
|
353
|
-
|
354
|
-
|
421
|
+
|
422
|
+
# Occupancy as a fraction of total space
|
423
|
+
# occupancy = (size.to_f / self.capacity.to_f)
|
424
|
+
|
425
|
+
# Occupancy as a fraction of USED space
|
426
|
+
occupancy = (size.to_f / self.used.to_f)
|
427
|
+
|
355
428
|
occupancy_pct = "#{'%.0f' % (occupancy * 100)}"
|
356
|
-
|
357
429
|
capacity_gb = "#{'%.0f' % (self.capacity.to_f / 1024 / 1024)}"
|
358
430
|
|
359
431
|
# if this dir contains more than 5% of disk space, add it to the tree
|
432
|
+
|
433
|
+
if (dir_to_analyze == self.target_dir)
|
434
|
+
# puts "Dir to analyze is the target dir, space used outside this dir.."
|
435
|
+
# account for space used outside of target dir
|
436
|
+
other_space = self.used - size
|
437
|
+
other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
|
438
|
+
parent.children.push(DirNode.new(parent, self.target_volume, self.target_volume, other_space_gb))
|
439
|
+
end
|
360
440
|
|
361
441
|
|
362
442
|
if (occupancy > self.threshold_pct)
|
363
|
-
puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
|
364
|
-
#
|
365
|
-
|
366
|
-
if (dir_to_analyze == self.target_dir)
|
367
|
-
|
368
|
-
other_space = self.used - size
|
369
|
-
other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
|
370
|
-
other_space_array = ['/', 'Other', other_space_gb]
|
371
|
-
|
372
|
-
short_target_dir = self.target_dir.split('/').reverse[0]
|
373
|
-
short_target_dir = (short_target_dir == nil) ? self.target_dir : short_target_dir
|
374
|
-
|
375
|
-
comparison = ['/', short_target_dir, size_gb]
|
376
|
-
|
377
|
-
# add them to the array
|
378
|
-
self.tree.push(other_space_array)
|
379
|
-
self.tree.push(comparison)
|
443
|
+
# puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
|
444
|
+
puts "Dir contains more than 5% of used disk space: #{dir_to_analyze} \n\tsize:\t\t#{size_gb} / \n\toccupancy:\t#{self.used_gb} = #{occupancy_pct}% of used space"
|
380
445
|
|
446
|
+
# puts "Dir to analyze (#{dir_to_analyze}) is not the target dir (#{self.target_dir})"
|
447
|
+
dirs = dir_to_analyze.split('/')
|
448
|
+
|
449
|
+
short_dir = dirs.pop().gsub("'","\\\\'")
|
450
|
+
full_parent = dirs.join('/')
|
451
|
+
if (dir_to_analyze == self.target_dir || full_parent == self.target_volume)
|
452
|
+
# puts "Either this dir is the target dir, or the parent is the target volume, make parent the full target volume"
|
453
|
+
short_parent = self.target_volume.gsub("'","\\\\'")
|
381
454
|
else
|
382
|
-
#
|
383
|
-
short_parent =
|
384
|
-
|
385
|
-
# short_parent = (short_parent == nil) ? parent : short_parent
|
386
|
-
# case for when parent is '/'
|
387
|
-
short_parent = (short_parent == '') ? '/' : short_parent
|
388
|
-
|
389
|
-
short_dir = dir_to_analyze.split('/').reverse[0]
|
390
|
-
|
391
|
-
# array_to_push = [parent, dir_to_analyze, size_gb]
|
392
|
-
array_to_push = [short_parent, short_dir, size_gb]
|
393
|
-
self.tree.push(array_to_push)
|
455
|
+
# puts "Neither this dir or parent is the target dir, making parent short"
|
456
|
+
short_parent = dirs.pop().gsub("'","\\\\'")
|
394
457
|
end
|
458
|
+
|
459
|
+
|
460
|
+
this_node = DirNode.new(parent, dir_to_analyze, short_dir, size_gb)
|
461
|
+
parent.children.push(this_node)
|
395
462
|
|
396
463
|
# run on all child dirs
|
397
464
|
Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
|
398
|
-
# puts "\tentry: >#{file}<"
|
399
|
-
|
400
465
|
full_path = File.join(dir_to_analyze, name)
|
401
|
-
|
402
|
-
if (Dir.exist?(full_path))
|
466
|
+
# don't follow any symlinks
|
467
|
+
if (Dir.exist?(full_path) && !File.symlink?(full_path))
|
403
468
|
# puts "Contender: >#{full_path}<"
|
404
|
-
analyze_dirs(full_path)
|
469
|
+
analyze_dirs(full_path, this_node)
|
405
470
|
end
|
406
471
|
end
|
407
472
|
|
408
|
-
end
|
473
|
+
end # occupancy > threshold
|
474
|
+
|
475
|
+
end # function
|
476
|
+
|
477
|
+
|
478
|
+
|
479
|
+
def traverse_tree_and_remove_duplicates
|
480
|
+
puts "\nHandling duplicate entries.."
|
481
|
+
nodes = []
|
482
|
+
nodes.push(self.dir_tree)
|
483
|
+
comparison_list = []
|
484
|
+
while true
|
485
|
+
if (nodes.length == 0)
|
486
|
+
break
|
487
|
+
end
|
409
488
|
|
489
|
+
node = nodes.shift
|
490
|
+
comparison_list.push(node)
|
491
|
+
# pp node
|
492
|
+
if node.parent == nil
|
493
|
+
# puts "\tparent: no parent \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
|
494
|
+
else
|
495
|
+
# puts "\tparent: #{node.parent.dir_short.to_s} \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
|
496
|
+
end
|
497
|
+
nodes.concat(node.children)
|
498
|
+
end
|
499
|
+
# puts "Done building node list"
|
500
|
+
|
501
|
+
|
502
|
+
|
503
|
+
for i in 0..comparison_list.length do
|
504
|
+
for j in 0..comparison_list.length do
|
505
|
+
if (comparison_list[i] != nil && comparison_list[j] != nil)
|
506
|
+
if (i != j && comparison_list[i].dir_short == comparison_list[j].dir_short)
|
507
|
+
puts "\t#{comparison_list[i].dir_short} is the same as #{comparison_list[j].dir_short}, changing to #{comparison_list[j].dir_short}*"
|
508
|
+
comparison_list[j].dir_short = "#{comparison_list[j].dir_short}*"
|
509
|
+
end
|
510
|
+
end
|
511
|
+
end
|
512
|
+
end
|
513
|
+
puts "Duplicate handling complete"
|
514
|
+
|
410
515
|
end
|
411
516
|
|
517
|
+
|
412
518
|
|
413
519
|
def run
|
414
520
|
self.get_basic_disk_info
|
415
|
-
self.analyze_dirs(self.target_dir)
|
521
|
+
self.analyze_dirs(self.target_dir, self.dir_tree)
|
522
|
+
self.traverse_tree_and_remove_duplicates
|
416
523
|
self.format_data_for_the_chart
|
417
524
|
self.write_storage_report
|
418
525
|
|
@@ -438,12 +545,12 @@ def run
|
|
438
545
|
vs = StorageVisualizer.new()
|
439
546
|
end
|
440
547
|
|
441
|
-
puts "\nRunning visualization"
|
548
|
+
# puts "\nRunning visualization"
|
442
549
|
vs.run()
|
443
550
|
|
444
551
|
# puts "dumping tree: "
|
445
552
|
# pp vs.tree
|
446
|
-
puts "Formatted tree\n#{vs.tree_formatted}"
|
553
|
+
# puts "Formatted tree\n#{vs.tree_formatted}"
|
447
554
|
|
448
555
|
end
|
449
556
|
|
metadata
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: storage_visualizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Terry Case
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This tool helps visualize which directories are occupying the most storage.
|
14
|
-
Any directory that occupies more than 5% of disk space is added to a visual
|
15
|
-
storage report in the form of a Google Sankey diagram. The storage
|
16
|
-
using the linux `du` utility. It has been tested on Mac OSX
|
17
|
-
systems, will not work on Windows. Run as sudo if analyzing
|
18
|
-
directories. May take a while to run.
|
14
|
+
Any directory that occupies more than 5% of used disk space is added to a visual
|
15
|
+
hierarchichal storage report in the form of a Google Sankey diagram. The storage
|
16
|
+
data is gathered using the linux `du` utility. It has been tested on Mac OSX and
|
17
|
+
linux systems (Ubuntu & CentOS), will not work on Windows. Run as sudo if analyzing
|
18
|
+
otherwise inaccessible directories. May take a while to run.
|
19
19
|
email: terrylcase@gmail.com
|
20
20
|
executables: []
|
21
21
|
extensions: []
|