storage_visualizer 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/storage_visualizer.rb +183 -76
  3. metadata +7 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88c10ac5e6c03634ed7e26436ab61cefd18de7ec
4
- data.tar.gz: da9c280b9296d6886df43cc010a3cf0942317824
3
+ metadata.gz: dafd6c489f0809c633d222e2f2e46a1f5f1f087c
4
+ data.tar.gz: 6c6b72560460adb5b4c13398fa26adc9b209e85b
5
5
  SHA512:
6
- metadata.gz: 4aa75292140cbf9594d6a14181608d69795440c1fbecb83b98daba489028946eba958b989b3f3caaf4eba4f26f4c452f7e0dada095e022719cfd6133c580ac7a
7
- data.tar.gz: 96b1a48b87221021a8746a70d2cbee8375660ad39c0050bf5ea13097c5f54de739716082d98175080a4d7f566928f9ee51c7d3e912a9d47a07e6e99e1fa81c80
6
+ metadata.gz: 496b3d800a10b5331980c9acd22e2e416f27ac628f2b260a17b723edde95845df949829f10fe99b8daac803a323a134ad86729e09ff2815f16e6a0f8819e5662
7
+ data.tar.gz: 0fc8ac60e6df0ecd7a5566fbda1e136e6e497a77843a55b26e7997a9e81f13768d47d3dd96ac8eb819d37d73db54fb4cc2ad7576c4f5b11f8157a97765edf722
@@ -20,6 +20,30 @@
20
20
  require 'pp'
21
21
  require 'yaml'
22
22
  require 'date'
23
+ require 'uri'
24
+ require 'cgi'
25
+ require 'json'
26
+
27
+
28
+ class DirNode
29
+ attr_accessor :parent
30
+ attr_accessor :dir_name
31
+ attr_accessor :dir_short
32
+ attr_accessor :size_gb
33
+ attr_accessor :children
34
+
35
+ def initialize(parent_in, dir_name_in, dir_short_in, size_gb_in)
36
+ self.parent = parent_in
37
+ self.dir_name = dir_name_in
38
+ self.dir_short = dir_short_in
39
+ self.size_gb = size_gb_in
40
+ self.children = []
41
+ end
42
+
43
+ end
44
+
45
+
46
+
23
47
 
24
48
  class StorageVisualizer
25
49
 
@@ -85,16 +109,15 @@ class StorageVisualizer
85
109
 
86
110
 
87
111
  # To do:
88
- # x Make it work on linux
112
+ # x Make it work on mac & linux (CentOS & Ubuntu)
89
113
  # x Specify blocksize and do not assume 512 bytes (use the -k flag, which reports blocks as KB)
90
- # - Enable for filesystems not mounted at the root '/'
114
+ # x Enable for filesystems not mounted at the root '/'
91
115
  # - Allow the threshold to be specified (default is 5%)
92
116
  # - Allow output filename to be specified
93
117
  # Maybe:
94
- # - Prevent paths on the graph from crossing
118
+ # x Prevent paths on the graph from crossing (dirs with the same name become the same node)
95
119
  # - See if it would be cleaner to use the googlecharts gem (gem install googlecharts)
96
- # - Create an installer that sets up cron scheduling and add polling to the webpage
97
- # - What to do about directories with the same name under different parents
120
+
98
121
 
99
122
  # disk Bytes
100
123
  attr_accessor :capacity
@@ -110,13 +133,18 @@ class StorageVisualizer
110
133
  attr_accessor :tree_formatted
111
134
  attr_accessor :diskhash
112
135
  attr_accessor :threshold_pct
136
+ attr_accessor :target_volume
137
+ attr_accessor :dupe_counter
138
+
139
+ # this is the root DirNode object
140
+ attr_accessor :dir_tree
113
141
 
114
142
  # Constructor
115
143
  def initialize(target_dir_passed = nil)
116
144
 
117
145
  if (target_dir_passed != nil)
118
146
  expanded = File.expand_path(target_dir_passed)
119
- puts "Target dir: #{expanded}"
147
+ # puts "Target dir: #{expanded}"
120
148
  if (Dir.exist?(expanded))
121
149
  self.target_dir = expanded
122
150
  else
@@ -127,25 +155,44 @@ class StorageVisualizer
127
155
  self.target_dir = File.expand_path('~')
128
156
  end
129
157
 
130
-
158
+ # how much space is considered worthy of noting on the chart
131
159
  self.threshold_pct = 0.05
132
160
  self.diskhash = {}
133
161
  self.tree = []
134
162
  self.tree_formatted = ''
163
+ self.dupe_counter = 0
135
164
  end
136
165
 
137
166
 
138
167
 
139
168
  def format_data_for_the_chart
140
- working_string = "[\n"
169
+
170
+ # Build the list of nodes
171
+ nodes = []
172
+ nodes.push(self.dir_tree)
173
+ comparison_list = []
174
+ while true
175
+ if (nodes.length == 0)
176
+ break
177
+ end
178
+ node = nodes.shift
179
+ comparison_list.push(node)
180
+ nodes.concat(node.children)
181
+ end
141
182
 
142
- self.tree.each_with_index do |entry, index|
143
- if(index == self.tree.length - 1)
183
+
184
+ # format the data for the chart
185
+ working_string = "[\n"
186
+ comparison_list.each_with_index do |entry, index|
187
+ if (entry.parent == nil)
188
+ next
189
+ end
190
+ if(index == comparison_list.length - 1)
144
191
  # this is the next to last element, it gets no comma
145
- working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ]\n"
192
+ working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ]\n"
146
193
  else
147
194
  # mind the comma
148
- working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ],\n"
195
+ working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ],\n"
149
196
  end
150
197
  end
151
198
  working_string << "]\n"
@@ -154,6 +201,7 @@ class StorageVisualizer
154
201
  end
155
202
 
156
203
 
204
+
157
205
  def write_storage_report
158
206
 
159
207
  the_html = %q|<html>
@@ -204,7 +252,7 @@ class StorageVisualizer
204
252
  var data = new google.visualization.DataTable();
205
253
  data.addColumn('string', 'From');
206
254
  data.addColumn('string', 'To');
207
- data.addColumn('number', 'Weight');
255
+ data.addColumn('number', 'Size (GB)');
208
256
  data.addRows( | + self.tree_formatted + %q|);
209
257
 
210
258
  // Set chart options
@@ -213,11 +261,12 @@ class StorageVisualizer
213
261
  width: 1000,
214
262
  sankey: {
215
263
  iterations: 32,
216
- node: { label: { fontName: 'Arial',
217
- fontSize: 10,
218
- color: '#871b47',
219
- bold: false,
220
- italic: true } } },
264
+ node: { label: {
265
+ fontName: 'Arial',
266
+ fontSize: 10,
267
+ color: '#871b47',
268
+ bold: false,
269
+ italic: true } } },
221
270
  };
222
271
 
223
272
 
@@ -246,7 +295,7 @@ class StorageVisualizer
246
295
  # df -l gets info about locally-mounted filesystems
247
296
  output = `df -lk`
248
297
 
249
- # OSX
298
+ # OSX:
250
299
  # Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on
251
300
  # /dev/disk1 975912960 349150592 626506368 36% 87351646 156626592 36% /
252
301
  # localhost:/QwnJE6UBvlR1EvqouX6gMM 975912960 975912960 0 100% 0 0 100% /Volumes/MobileBackups
@@ -275,8 +324,10 @@ class StorageVisualizer
275
324
  # {"capacity"=>498876809216, "used"=>498876809216, "available"=>0}
276
325
  # }
277
326
 
327
+ # get each mount's capacity & utilization
278
328
  output.lines.each_with_index do |line, index|
279
329
  if (index == 0)
330
+ # skip the header line
280
331
  next
281
332
  end
282
333
  cols = line.split
@@ -304,45 +355,62 @@ class StorageVisualizer
304
355
 
305
356
  # puts "Disk mount info:"
306
357
  # pp diskhash
307
- self.capacity = self.diskhash['/']['capacity']
308
- self.used = self.diskhash['/']['used']
309
- self.available = self.diskhash['/']['available']
310
358
 
311
359
 
360
+ # find the (self.)target_volume
361
+ # look through diskhash keys, to find the one that most matches target_dir
362
+ val_of_min = 1000
363
+ # puts "Determining which volume contains the target directory.."
364
+ self.diskhash.keys.each do |volume|
365
+ result = self.target_dir.gsub(volume, '')
366
+ diskhash['match_amt'] = result.length
367
+ # puts "Considering:\t#{volume}, \t closeness: #{result.length}, \t (#{result})"
368
+ if (result.length < val_of_min)
369
+ # puts "Candidate: #{volume}"
370
+ val_of_min = result.length
371
+ self.target_volume = volume
372
+ end
373
+ end
374
+
375
+ puts "Target volume is #{self.target_volume}"
376
+
312
377
 
313
- free_space = (self.available).to_i
314
- free_space_gb = "#{'%.0f' % (free_space / 1024 / 1024)}"
315
- free_space_array = ['/', 'Free Space', free_space_gb]
316
- self.tree.push(free_space_array)
378
+ self.capacity = self.diskhash[self.target_volume]['capacity']
379
+ self.used = self.diskhash[self.target_volume]['used']
380
+ self.available = self.diskhash[self.target_volume]['available']
317
381
 
318
382
  self.capacity_gb = "#{'%.0f' % (self.capacity.to_i / 1024 / 1024)}"
319
383
  self.used_gb = "#{'%.0f' % (self.used.to_i / 1024 / 1024)}"
320
384
  self.available_gb = "#{'%.0f' % (self.available.to_i / 1024 / 1024)}"
321
385
 
322
-
386
+ self.dir_tree = DirNode.new(nil, self.target_volume, self.target_volume, self.capacity)
387
+ self.dir_tree.children.push(DirNode.new(self.dir_tree, 'Free Space', 'Free Space', self.available_gb))
323
388
 
324
389
  end
325
390
 
326
391
 
327
- def analyze_dirs(dir_to_analyze)
392
+ # Crawl the dirs recursively, beginning with the target dir
393
+ def analyze_dirs(dir_to_analyze, parent)
328
394
 
329
- # bootstrap case
330
- if (dir_to_analyze == '/')
331
395
 
332
- # run on all child dirs
396
+ # bootstrap case
397
+ # don't create an entry for the root because there's nothing to link to yet, scan the subdirs
398
+ if (dir_to_analyze == self.target_volume)
399
+ # puts "Dir to analyze is the target volume"
400
+ # run on all child dirs, not this dir
333
401
  Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
334
402
  # puts "\tentry: >#{file}<"
335
403
  full_path = File.join(dir_to_analyze, name)
336
- if (Dir.exist?(full_path))
404
+ if (Dir.exist?(full_path) && !File.symlink?(full_path))
337
405
  # puts "Contender: >#{full_path}<"
338
- analyze_dirs(full_path)
406
+ analyze_dirs(full_path, self.dir_tree)
339
407
  end
340
408
  end
341
409
  return
342
410
  end
343
411
 
344
-
345
- cmd = "du -sxk \"#{dir_to_analyze}\""
412
+ # use "P" to help prevent following any symlinks
413
+ cmd = "du -sxkP \"#{dir_to_analyze}\""
346
414
  puts "\trunning #{cmd}"
347
415
  output = `#{cmd}`.strip().split("\t")
348
416
  # puts "Du output:"
@@ -350,69 +418,108 @@ class StorageVisualizer
350
418
  size = output[0].to_i
351
419
  size_gb = "#{'%.0f' % (size.to_f / 1024 / 1024)}"
352
420
  # puts "Size: #{size}\nCapacity: #{self.diskhash['/']['capacity']}"
353
-
354
- occupancy = (size.to_f / self.capacity.to_f)
421
+
422
+ # Occupancy as a fraction of total space
423
+ # occupancy = (size.to_f / self.capacity.to_f)
424
+
425
+ # Occupancy as a fraction of USED space
426
+ occupancy = (size.to_f / self.used.to_f)
427
+
355
428
  occupancy_pct = "#{'%.0f' % (occupancy * 100)}"
356
-
357
429
  capacity_gb = "#{'%.0f' % (self.capacity.to_f / 1024 / 1024)}"
358
430
 
359
431
  # if this dir contains more than 5% of disk space, add it to the tree
432
+
433
+ if (dir_to_analyze == self.target_dir)
434
+ # puts "Dir to analyze is the target dir, space used outside this dir.."
435
+ # account for space used outside of target dir
436
+ other_space = self.used - size
437
+ other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
438
+ parent.children.push(DirNode.new(parent, self.target_volume, self.target_volume, other_space_gb))
439
+ end
360
440
 
361
441
 
362
442
  if (occupancy > self.threshold_pct)
363
- puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
364
- # push this dir's info
365
-
366
- if (dir_to_analyze == self.target_dir)
367
-
368
- other_space = self.used - size
369
- other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
370
- other_space_array = ['/', 'Other', other_space_gb]
371
-
372
- short_target_dir = self.target_dir.split('/').reverse[0]
373
- short_target_dir = (short_target_dir == nil) ? self.target_dir : short_target_dir
374
-
375
- comparison = ['/', short_target_dir, size_gb]
376
-
377
- # add them to the array
378
- self.tree.push(other_space_array)
379
- self.tree.push(comparison)
443
+ # puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
444
+ puts "Dir contains more than 5% of used disk space: #{dir_to_analyze} \n\tsize:\t\t#{size_gb} / \n\toccupancy:\t#{self.used_gb} = #{occupancy_pct}% of used space"
380
445
 
446
+ # puts "Dir to analyze (#{dir_to_analyze}) is not the target dir (#{self.target_dir})"
447
+ dirs = dir_to_analyze.split('/')
448
+
449
+ short_dir = dirs.pop().gsub("'","\\\\'")
450
+ full_parent = dirs.join('/')
451
+ if (dir_to_analyze == self.target_dir || full_parent == self.target_volume)
452
+ # puts "Either this dir is the target dir, or the parent is the target volume, make parent the full target volume"
453
+ short_parent = self.target_volume.gsub("'","\\\\'")
381
454
  else
382
- # get parent dir and add to the tree
383
- short_parent = dir_to_analyze.split('/').reverse[1]
384
-
385
- # short_parent = (short_parent == nil) ? parent : short_parent
386
- # case for when parent is '/'
387
- short_parent = (short_parent == '') ? '/' : short_parent
388
-
389
- short_dir = dir_to_analyze.split('/').reverse[0]
390
-
391
- # array_to_push = [parent, dir_to_analyze, size_gb]
392
- array_to_push = [short_parent, short_dir, size_gb]
393
- self.tree.push(array_to_push)
455
+ # puts "Neither this dir or parent is the target dir, making parent short"
456
+ short_parent = dirs.pop().gsub("'","\\\\'")
394
457
  end
458
+
459
+
460
+ this_node = DirNode.new(parent, dir_to_analyze, short_dir, size_gb)
461
+ parent.children.push(this_node)
395
462
 
396
463
  # run on all child dirs
397
464
  Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
398
- # puts "\tentry: >#{file}<"
399
-
400
465
  full_path = File.join(dir_to_analyze, name)
401
-
402
- if (Dir.exist?(full_path))
466
+ # don't follow any symlinks
467
+ if (Dir.exist?(full_path) && !File.symlink?(full_path))
403
468
  # puts "Contender: >#{full_path}<"
404
- analyze_dirs(full_path)
469
+ analyze_dirs(full_path, this_node)
405
470
  end
406
471
  end
407
472
 
408
- end
473
+ end # occupancy > threshold
474
+
475
+ end # function
476
+
477
+
478
+
479
+ def traverse_tree_and_remove_duplicates
480
+ puts "\nHandling duplicate entries.."
481
+ nodes = []
482
+ nodes.push(self.dir_tree)
483
+ comparison_list = []
484
+ while true
485
+ if (nodes.length == 0)
486
+ break
487
+ end
409
488
 
489
+ node = nodes.shift
490
+ comparison_list.push(node)
491
+ # pp node
492
+ if node.parent == nil
493
+ # puts "\tparent: no parent \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
494
+ else
495
+ # puts "\tparent: #{node.parent.dir_short.to_s} \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
496
+ end
497
+ nodes.concat(node.children)
498
+ end
499
+ # puts "Done building node list"
500
+
501
+
502
+
503
+ for i in 0..comparison_list.length do
504
+ for j in 0..comparison_list.length do
505
+ if (comparison_list[i] != nil && comparison_list[j] != nil)
506
+ if (i != j && comparison_list[i].dir_short == comparison_list[j].dir_short)
507
+ puts "\t#{comparison_list[i].dir_short} is the same as #{comparison_list[j].dir_short}, changing to #{comparison_list[j].dir_short}*"
508
+ comparison_list[j].dir_short = "#{comparison_list[j].dir_short}*"
509
+ end
510
+ end
511
+ end
512
+ end
513
+ puts "Duplicate handling complete"
514
+
410
515
  end
411
516
 
517
+
412
518
 
413
519
  def run
414
520
  self.get_basic_disk_info
415
- self.analyze_dirs(self.target_dir)
521
+ self.analyze_dirs(self.target_dir, self.dir_tree)
522
+ self.traverse_tree_and_remove_duplicates
416
523
  self.format_data_for_the_chart
417
524
  self.write_storage_report
418
525
 
@@ -438,12 +545,12 @@ def run
438
545
  vs = StorageVisualizer.new()
439
546
  end
440
547
 
441
- puts "\nRunning visualization"
548
+ # puts "\nRunning visualization"
442
549
  vs.run()
443
550
 
444
551
  # puts "dumping tree: "
445
552
  # pp vs.tree
446
- puts "Formatted tree\n#{vs.tree_formatted}"
553
+ # puts "Formatted tree\n#{vs.tree_formatted}"
447
554
 
448
555
  end
449
556
 
metadata CHANGED
@@ -1,21 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: storage_visualizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Terry Case
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-07 00:00:00.000000000 Z
11
+ date: 2015-10-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This tool helps visualize which directories are occupying the most storage.
14
- Any directory that occupies more than 5% of disk space is added to a visual hierarchichal
15
- storage report in the form of a Google Sankey diagram. The storage data is gathered
16
- using the linux `du` utility. It has been tested on Mac OSX, should work on linux
17
- systems, will not work on Windows. Run as sudo if analyzing otherwise inaccessible
18
- directories. May take a while to run.
14
+ Any directory that occupies more than 5% of used disk space is added to a visual
15
+ hierarchichal storage report in the form of a Google Sankey diagram. The storage
16
+ data is gathered using the linux `du` utility. It has been tested on Mac OSX and
17
+ linux systems (Ubuntu & CentOS), will not work on Windows. Run as sudo if analyzing
18
+ otherwise inaccessible directories. May take a while to run.
19
19
  email: terrylcase@gmail.com
20
20
  executables: []
21
21
  extensions: []