storage_visualizer 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/storage_visualizer.rb +183 -76
  3. metadata +7 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 88c10ac5e6c03634ed7e26436ab61cefd18de7ec
4
- data.tar.gz: da9c280b9296d6886df43cc010a3cf0942317824
3
+ metadata.gz: dafd6c489f0809c633d222e2f2e46a1f5f1f087c
4
+ data.tar.gz: 6c6b72560460adb5b4c13398fa26adc9b209e85b
5
5
  SHA512:
6
- metadata.gz: 4aa75292140cbf9594d6a14181608d69795440c1fbecb83b98daba489028946eba958b989b3f3caaf4eba4f26f4c452f7e0dada095e022719cfd6133c580ac7a
7
- data.tar.gz: 96b1a48b87221021a8746a70d2cbee8375660ad39c0050bf5ea13097c5f54de739716082d98175080a4d7f566928f9ee51c7d3e912a9d47a07e6e99e1fa81c80
6
+ metadata.gz: 496b3d800a10b5331980c9acd22e2e416f27ac628f2b260a17b723edde95845df949829f10fe99b8daac803a323a134ad86729e09ff2815f16e6a0f8819e5662
7
+ data.tar.gz: 0fc8ac60e6df0ecd7a5566fbda1e136e6e497a77843a55b26e7997a9e81f13768d47d3dd96ac8eb819d37d73db54fb4cc2ad7576c4f5b11f8157a97765edf722
@@ -20,6 +20,30 @@
20
20
  require 'pp'
21
21
  require 'yaml'
22
22
  require 'date'
23
+ require 'uri'
24
+ require 'cgi'
25
+ require 'json'
26
+
27
+
28
+ class DirNode
29
+ attr_accessor :parent
30
+ attr_accessor :dir_name
31
+ attr_accessor :dir_short
32
+ attr_accessor :size_gb
33
+ attr_accessor :children
34
+
35
+ def initialize(parent_in, dir_name_in, dir_short_in, size_gb_in)
36
+ self.parent = parent_in
37
+ self.dir_name = dir_name_in
38
+ self.dir_short = dir_short_in
39
+ self.size_gb = size_gb_in
40
+ self.children = []
41
+ end
42
+
43
+ end
44
+
45
+
46
+
23
47
 
24
48
  class StorageVisualizer
25
49
 
@@ -85,16 +109,15 @@ class StorageVisualizer
85
109
 
86
110
 
87
111
  # To do:
88
- # x Make it work on linux
112
+ # x Make it work on mac & linux (CentOS & Ubuntu)
89
113
  # x Specify blocksize and do not assume 512 bytes (use the -k flag, which reports blocks as KB)
90
- # - Enable for filesystems not mounted at the root '/'
114
+ # x Enable for filesystems not mounted at the root '/'
91
115
  # - Allow the threshold to be specified (default is 5%)
92
116
  # - Allow output filename to be specified
93
117
  # Maybe:
94
- # - Prevent paths on the graph from crossing
118
+ # x Prevent paths on the graph from crossing (dirs with the same name become the same node)
95
119
  # - See if it would be cleaner to use the googlecharts gem (gem install googlecharts)
96
- # - Create an installer that sets up cron scheduling and add polling to the webpage
97
- # - What to do about directories with the same name under different parents
120
+
98
121
 
99
122
  # disk Bytes
100
123
  attr_accessor :capacity
@@ -110,13 +133,18 @@ class StorageVisualizer
110
133
  attr_accessor :tree_formatted
111
134
  attr_accessor :diskhash
112
135
  attr_accessor :threshold_pct
136
+ attr_accessor :target_volume
137
+ attr_accessor :dupe_counter
138
+
139
+ # this is the root DirNode object
140
+ attr_accessor :dir_tree
113
141
 
114
142
  # Constructor
115
143
  def initialize(target_dir_passed = nil)
116
144
 
117
145
  if (target_dir_passed != nil)
118
146
  expanded = File.expand_path(target_dir_passed)
119
- puts "Target dir: #{expanded}"
147
+ # puts "Target dir: #{expanded}"
120
148
  if (Dir.exist?(expanded))
121
149
  self.target_dir = expanded
122
150
  else
@@ -127,25 +155,44 @@ class StorageVisualizer
127
155
  self.target_dir = File.expand_path('~')
128
156
  end
129
157
 
130
-
158
+ # how much space is considered worthy of noting on the chart
131
159
  self.threshold_pct = 0.05
132
160
  self.diskhash = {}
133
161
  self.tree = []
134
162
  self.tree_formatted = ''
163
+ self.dupe_counter = 0
135
164
  end
136
165
 
137
166
 
138
167
 
139
168
  def format_data_for_the_chart
140
- working_string = "[\n"
169
+
170
+ # Build the list of nodes
171
+ nodes = []
172
+ nodes.push(self.dir_tree)
173
+ comparison_list = []
174
+ while true
175
+ if (nodes.length == 0)
176
+ break
177
+ end
178
+ node = nodes.shift
179
+ comparison_list.push(node)
180
+ nodes.concat(node.children)
181
+ end
141
182
 
142
- self.tree.each_with_index do |entry, index|
143
- if(index == self.tree.length - 1)
183
+
184
+ # format the data for the chart
185
+ working_string = "[\n"
186
+ comparison_list.each_with_index do |entry, index|
187
+ if (entry.parent == nil)
188
+ next
189
+ end
190
+ if(index == comparison_list.length - 1)
144
191
  # this is the next to last element, it gets no comma
145
- working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ]\n"
192
+ working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ]\n"
146
193
  else
147
194
  # mind the comma
148
- working_string << "[ '#{entry[0]}', '#{entry[1]}', #{entry[2]} ],\n"
195
+ working_string << "[ '#{entry.parent.dir_short}', '#{entry.dir_short}', #{entry.size_gb} ],\n"
149
196
  end
150
197
  end
151
198
  working_string << "]\n"
@@ -154,6 +201,7 @@ class StorageVisualizer
154
201
  end
155
202
 
156
203
 
204
+
157
205
  def write_storage_report
158
206
 
159
207
  the_html = %q|<html>
@@ -204,7 +252,7 @@ class StorageVisualizer
204
252
  var data = new google.visualization.DataTable();
205
253
  data.addColumn('string', 'From');
206
254
  data.addColumn('string', 'To');
207
- data.addColumn('number', 'Weight');
255
+ data.addColumn('number', 'Size (GB)');
208
256
  data.addRows( | + self.tree_formatted + %q|);
209
257
 
210
258
  // Set chart options
@@ -213,11 +261,12 @@ class StorageVisualizer
213
261
  width: 1000,
214
262
  sankey: {
215
263
  iterations: 32,
216
- node: { label: { fontName: 'Arial',
217
- fontSize: 10,
218
- color: '#871b47',
219
- bold: false,
220
- italic: true } } },
264
+ node: { label: {
265
+ fontName: 'Arial',
266
+ fontSize: 10,
267
+ color: '#871b47',
268
+ bold: false,
269
+ italic: true } } },
221
270
  };
222
271
 
223
272
 
@@ -246,7 +295,7 @@ class StorageVisualizer
246
295
  # df -l gets info about locally-mounted filesystems
247
296
  output = `df -lk`
248
297
 
249
- # OSX
298
+ # OSX:
250
299
  # Filesystem 1024-blocks Used Available Capacity iused ifree %iused Mounted on
251
300
  # /dev/disk1 975912960 349150592 626506368 36% 87351646 156626592 36% /
252
301
  # localhost:/QwnJE6UBvlR1EvqouX6gMM 975912960 975912960 0 100% 0 0 100% /Volumes/MobileBackups
@@ -275,8 +324,10 @@ class StorageVisualizer
275
324
  # {"capacity"=>498876809216, "used"=>498876809216, "available"=>0}
276
325
  # }
277
326
 
327
+ # get each mount's capacity & utilization
278
328
  output.lines.each_with_index do |line, index|
279
329
  if (index == 0)
330
+ # skip the header line
280
331
  next
281
332
  end
282
333
  cols = line.split
@@ -304,45 +355,62 @@ class StorageVisualizer
304
355
 
305
356
  # puts "Disk mount info:"
306
357
  # pp diskhash
307
- self.capacity = self.diskhash['/']['capacity']
308
- self.used = self.diskhash['/']['used']
309
- self.available = self.diskhash['/']['available']
310
358
 
311
359
 
360
+ # find the (self.)target_volume
361
+ # look through diskhash keys, to find the one that most matches target_dir
362
+ val_of_min = 1000
363
+ # puts "Determining which volume contains the target directory.."
364
+ self.diskhash.keys.each do |volume|
365
+ result = self.target_dir.gsub(volume, '')
366
+ diskhash['match_amt'] = result.length
367
+ # puts "Considering:\t#{volume}, \t closeness: #{result.length}, \t (#{result})"
368
+ if (result.length < val_of_min)
369
+ # puts "Candidate: #{volume}"
370
+ val_of_min = result.length
371
+ self.target_volume = volume
372
+ end
373
+ end
374
+
375
+ puts "Target volume is #{self.target_volume}"
376
+
312
377
 
313
- free_space = (self.available).to_i
314
- free_space_gb = "#{'%.0f' % (free_space / 1024 / 1024)}"
315
- free_space_array = ['/', 'Free Space', free_space_gb]
316
- self.tree.push(free_space_array)
378
+ self.capacity = self.diskhash[self.target_volume]['capacity']
379
+ self.used = self.diskhash[self.target_volume]['used']
380
+ self.available = self.diskhash[self.target_volume]['available']
317
381
 
318
382
  self.capacity_gb = "#{'%.0f' % (self.capacity.to_i / 1024 / 1024)}"
319
383
  self.used_gb = "#{'%.0f' % (self.used.to_i / 1024 / 1024)}"
320
384
  self.available_gb = "#{'%.0f' % (self.available.to_i / 1024 / 1024)}"
321
385
 
322
-
386
+ self.dir_tree = DirNode.new(nil, self.target_volume, self.target_volume, self.capacity)
387
+ self.dir_tree.children.push(DirNode.new(self.dir_tree, 'Free Space', 'Free Space', self.available_gb))
323
388
 
324
389
  end
325
390
 
326
391
 
327
- def analyze_dirs(dir_to_analyze)
392
+ # Crawl the dirs recursively, beginning with the target dir
393
+ def analyze_dirs(dir_to_analyze, parent)
328
394
 
329
- # bootstrap case
330
- if (dir_to_analyze == '/')
331
395
 
332
- # run on all child dirs
396
+ # bootstrap case
397
+ # don't create an entry for the root because there's nothing to link to yet, scan the subdirs
398
+ if (dir_to_analyze == self.target_volume)
399
+ # puts "Dir to analyze is the target volume"
400
+ # run on all child dirs, not this dir
333
401
  Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
334
402
  # puts "\tentry: >#{file}<"
335
403
  full_path = File.join(dir_to_analyze, name)
336
- if (Dir.exist?(full_path))
404
+ if (Dir.exist?(full_path) && !File.symlink?(full_path))
337
405
  # puts "Contender: >#{full_path}<"
338
- analyze_dirs(full_path)
406
+ analyze_dirs(full_path, self.dir_tree)
339
407
  end
340
408
  end
341
409
  return
342
410
  end
343
411
 
344
-
345
- cmd = "du -sxk \"#{dir_to_analyze}\""
412
+ # use "P" to help prevent following any symlinks
413
+ cmd = "du -sxkP \"#{dir_to_analyze}\""
346
414
  puts "\trunning #{cmd}"
347
415
  output = `#{cmd}`.strip().split("\t")
348
416
  # puts "Du output:"
@@ -350,69 +418,108 @@ class StorageVisualizer
350
418
  size = output[0].to_i
351
419
  size_gb = "#{'%.0f' % (size.to_f / 1024 / 1024)}"
352
420
  # puts "Size: #{size}\nCapacity: #{self.diskhash['/']['capacity']}"
353
-
354
- occupancy = (size.to_f / self.capacity.to_f)
421
+
422
+ # Occupancy as a fraction of total space
423
+ # occupancy = (size.to_f / self.capacity.to_f)
424
+
425
+ # Occupancy as a fraction of USED space
426
+ occupancy = (size.to_f / self.used.to_f)
427
+
355
428
  occupancy_pct = "#{'%.0f' % (occupancy * 100)}"
356
-
357
429
  capacity_gb = "#{'%.0f' % (self.capacity.to_f / 1024 / 1024)}"
358
430
 
359
431
  # if this dir contains more than 5% of disk space, add it to the tree
432
+
433
+ if (dir_to_analyze == self.target_dir)
434
+ # puts "Dir to analyze is the target dir, space used outside this dir.."
435
+ # account for space used outside of target dir
436
+ other_space = self.used - size
437
+ other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
438
+ parent.children.push(DirNode.new(parent, self.target_volume, self.target_volume, other_space_gb))
439
+ end
360
440
 
361
441
 
362
442
  if (occupancy > self.threshold_pct)
363
- puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
364
- # push this dir's info
365
-
366
- if (dir_to_analyze == self.target_dir)
367
-
368
- other_space = self.used - size
369
- other_space_gb = "#{'%.0f' % (other_space / 1024 / 1024)}"
370
- other_space_array = ['/', 'Other', other_space_gb]
371
-
372
- short_target_dir = self.target_dir.split('/').reverse[0]
373
- short_target_dir = (short_target_dir == nil) ? self.target_dir : short_target_dir
374
-
375
- comparison = ['/', short_target_dir, size_gb]
376
-
377
- # add them to the array
378
- self.tree.push(other_space_array)
379
- self.tree.push(comparison)
443
+ # puts "Dir contains more than 5% of disk space: #{dir_to_analyze} \n\tsize:\t#{size_gb} / \ncapacity:\t#{capacity_gb} = #{occupancy_pct}%"
444
+ puts "Dir contains more than 5% of used disk space: #{dir_to_analyze} \n\tsize:\t\t#{size_gb} / \n\toccupancy:\t#{self.used_gb} = #{occupancy_pct}% of used space"
380
445
 
446
+ # puts "Dir to analyze (#{dir_to_analyze}) is not the target dir (#{self.target_dir})"
447
+ dirs = dir_to_analyze.split('/')
448
+
449
+ short_dir = dirs.pop().gsub("'","\\\\'")
450
+ full_parent = dirs.join('/')
451
+ if (dir_to_analyze == self.target_dir || full_parent == self.target_volume)
452
+ # puts "Either this dir is the target dir, or the parent is the target volume, make parent the full target volume"
453
+ short_parent = self.target_volume.gsub("'","\\\\'")
381
454
  else
382
- # get parent dir and add to the tree
383
- short_parent = dir_to_analyze.split('/').reverse[1]
384
-
385
- # short_parent = (short_parent == nil) ? parent : short_parent
386
- # case for when parent is '/'
387
- short_parent = (short_parent == '') ? '/' : short_parent
388
-
389
- short_dir = dir_to_analyze.split('/').reverse[0]
390
-
391
- # array_to_push = [parent, dir_to_analyze, size_gb]
392
- array_to_push = [short_parent, short_dir, size_gb]
393
- self.tree.push(array_to_push)
455
+ # puts "Neither this dir or parent is the target dir, making parent short"
456
+ short_parent = dirs.pop().gsub("'","\\\\'")
394
457
  end
458
+
459
+
460
+ this_node = DirNode.new(parent, dir_to_analyze, short_dir, size_gb)
461
+ parent.children.push(this_node)
395
462
 
396
463
  # run on all child dirs
397
464
  Dir.entries(dir_to_analyze).reject {|d| d.start_with?('.')}.each do |name|
398
- # puts "\tentry: >#{file}<"
399
-
400
465
  full_path = File.join(dir_to_analyze, name)
401
-
402
- if (Dir.exist?(full_path))
466
+ # don't follow any symlinks
467
+ if (Dir.exist?(full_path) && !File.symlink?(full_path))
403
468
  # puts "Contender: >#{full_path}<"
404
- analyze_dirs(full_path)
469
+ analyze_dirs(full_path, this_node)
405
470
  end
406
471
  end
407
472
 
408
- end
473
+ end # occupancy > threshold
474
+
475
+ end # function
476
+
477
+
478
+
479
+ def traverse_tree_and_remove_duplicates
480
+ puts "\nHandling duplicate entries.."
481
+ nodes = []
482
+ nodes.push(self.dir_tree)
483
+ comparison_list = []
484
+ while true
485
+ if (nodes.length == 0)
486
+ break
487
+ end
409
488
 
489
+ node = nodes.shift
490
+ comparison_list.push(node)
491
+ # pp node
492
+ if node.parent == nil
493
+ # puts "\tparent: no parent \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
494
+ else
495
+ # puts "\tparent: #{node.parent.dir_short.to_s} \n\tdir: #{node.dir_name} \n\tshort: #{node.dir_short} \n\tsize: #{node.size_gb}"
496
+ end
497
+ nodes.concat(node.children)
498
+ end
499
+ # puts "Done building node list"
500
+
501
+
502
+
503
+ for i in 0..comparison_list.length do
504
+ for j in 0..comparison_list.length do
505
+ if (comparison_list[i] != nil && comparison_list[j] != nil)
506
+ if (i != j && comparison_list[i].dir_short == comparison_list[j].dir_short)
507
+ puts "\t#{comparison_list[i].dir_short} is the same as #{comparison_list[j].dir_short}, changing to #{comparison_list[j].dir_short}*"
508
+ comparison_list[j].dir_short = "#{comparison_list[j].dir_short}*"
509
+ end
510
+ end
511
+ end
512
+ end
513
+ puts "Duplicate handling complete"
514
+
410
515
  end
411
516
 
517
+
412
518
 
413
519
  def run
414
520
  self.get_basic_disk_info
415
- self.analyze_dirs(self.target_dir)
521
+ self.analyze_dirs(self.target_dir, self.dir_tree)
522
+ self.traverse_tree_and_remove_duplicates
416
523
  self.format_data_for_the_chart
417
524
  self.write_storage_report
418
525
 
@@ -438,12 +545,12 @@ def run
438
545
  vs = StorageVisualizer.new()
439
546
  end
440
547
 
441
- puts "\nRunning visualization"
548
+ # puts "\nRunning visualization"
442
549
  vs.run()
443
550
 
444
551
  # puts "dumping tree: "
445
552
  # pp vs.tree
446
- puts "Formatted tree\n#{vs.tree_formatted}"
553
+ # puts "Formatted tree\n#{vs.tree_formatted}"
447
554
 
448
555
  end
449
556
 
metadata CHANGED
@@ -1,21 +1,21 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: storage_visualizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Terry Case
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-07 00:00:00.000000000 Z
11
+ date: 2015-10-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This tool helps visualize which directories are occupying the most storage.
14
- Any directory that occupies more than 5% of disk space is added to a visual hierarchichal
15
- storage report in the form of a Google Sankey diagram. The storage data is gathered
16
- using the linux `du` utility. It has been tested on Mac OSX, should work on linux
17
- systems, will not work on Windows. Run as sudo if analyzing otherwise inaccessible
18
- directories. May take a while to run.
14
+ Any directory that occupies more than 5% of used disk space is added to a visual
15
+ hierarchichal storage report in the form of a Google Sankey diagram. The storage
16
+ data is gathered using the linux `du` utility. It has been tested on Mac OSX and
17
+ linux systems (Ubuntu & CentOS), will not work on Windows. Run as sudo if analyzing
18
+ otherwise inaccessible directories. May take a while to run.
19
19
  email: terrylcase@gmail.com
20
20
  executables: []
21
21
  extensions: []