storage-guardian 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d29b5d8b123281c76397d7cad89c3c4cca61ba4e1538c708a6cf7d495b589315
4
+ data.tar.gz: 7b2fafef38ba2b9821ae407cba74546c11c75861caf0be67edeae2ec3d3ae0a2
5
+ SHA512:
6
+ metadata.gz: 84d013eed13183f67f266f4b1014064a0bd7d93c437c2c15054ba1a8ff67f94792374fd5cbc1f0f607f7fd7fccaf1b65081fff6f31c7ab46582549bcc6410a7d
7
+ data.tar.gz: e9c9e5ff18fe07b0fed185476fc735816166685e98cf25c2207f50a4df424da14b86290ff4abc3ed2d6e3414df0ccbd33a2b959e8084376fa826528d61953b69
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # Storage Guardian
2
+
3
+ Storage conservation guardian — duplicate detection, bloat detection, cold file detection, and budget tracking.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ gem install storage-guardian
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```ruby
14
+ require "storage_guardian"
15
+
16
+ # Quick scan
17
+ report = StorageGuardian.scan("/path/to/project")
18
+ puts report
19
+
20
+ # With custom budget
21
+ budget = StorageGuardian::Budget.new(max_file_size_mb: 50, max_dir_size_mb: 500)
22
+ report = StorageGuardian.scan("/path/to/project", budget: budget, cold_threshold_days: 60)
23
+ puts report.summary
24
+ ```
25
+
26
+ ## Features
27
+
28
+ - **Duplicate Detection** — SHA256 hash-based, groups identical files
29
+ - **Bloat Detection** — oversized files and directories vs budget
30
+ - **Cold File Detection** — files not modified in N days
31
+ - **Budget Tracking** — configurable limits
32
+
33
+ ## License
34
+
35
+ MIT
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StorageGuardian
4
+ class BloatDetector
5
+ def initialize(entries, budget)
6
+ @entries = entries
7
+ @budget = budget
8
+ end
9
+
10
+ def detect
11
+ findings = []
12
+
13
+ # Oversized files
14
+ max_file = @budget.limit_for(:max_file_size_mb) * 1024 * 1024
15
+ @entries.each do |e|
16
+ if e.size > max_file
17
+ findings << {
18
+ type: :oversized_file,
19
+ severity: :high,
20
+ path: e.path,
21
+ size_mb: (e.size / 1024.0 / 1024.0).round(2),
22
+ limit_mb: @budget.limit_for(:max_file_size_mb),
23
+ message: "File #{e.path} is #{(e.size / 1024.0 / 1024.0).round(2)}MB (limit: #{@budget.limit_for(:max_file_size_mb)}MB)"
24
+ }
25
+ end
26
+ end
27
+
28
+ # Bloated directories
29
+ max_dir = @budget.limit_for(:max_dir_size_mb) * 1024 * 1024
30
+ dir_sizes = @entries.each_with_object(Hash.new(0)) { |e, h| h[File.dirname(e.path)] += e.size }
31
+ dir_sizes.each do |dir, size|
32
+ if size > max_dir
33
+ findings << {
34
+ type: :bloated_directory,
35
+ severity: :medium,
36
+ path: dir,
37
+ size_mb: (size / 1024.0 / 1024.0).round(2),
38
+ limit_mb: @budget.limit_for(:max_dir_size_mb),
39
+ message: "Directory #{dir} is #{(size / 1024.0 / 1024.0).round(2)}MB (limit: #{@budget.limit_for(:max_dir_size_mb)}MB)"
40
+ }
41
+ end
42
+ end
43
+
44
+ findings
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StorageGuardian
4
+ class Budget
5
+ DEFAULTS = {
6
+ max_file_size_mb: 100,
7
+ max_dir_size_mb: 1024,
8
+ max_file_count: 10_000,
9
+ max_duplicate_ratio: 0.1
10
+ }.freeze
11
+
12
+ attr_reader :limits
13
+
14
+ def initialize(limits = {})
15
+ @limits = DEFAULTS.merge(limits.transform_keys(&:to_sym))
16
+ end
17
+
18
+ def limit_for(key)
19
+ @limits[key.to_sym]
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StorageGuardian
4
+ class ColdDetector
5
+ def initialize(entries, threshold_days)
6
+ @entries = entries
7
+ @threshold_days = threshold_days
8
+ @cutoff = Time.now - (threshold_days * 86400)
9
+ end
10
+
11
+ def detect
12
+ @entries.select { |e| e.mtime < @cutoff }.map do |e|
13
+ {
14
+ type: :cold_file,
15
+ severity: :low,
16
+ path: e.path,
17
+ size_mb: (e.size / 1024.0 / 1024.0).round(2),
18
+ last_modified: e.mtime,
19
+ days_old: ((Time.now - e.mtime) / 86400).round,
20
+ message: "File #{e.path} not modified for #{((Time.now - e.mtime) / 86400).round} days (#{(e.size / 1024.0 / 1024.0).round(2)}MB)"
21
+ }
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module StorageGuardian
6
+ class DuplicateDetector
7
+ def initialize(entries)
8
+ @entries = entries
9
+ end
10
+
11
+ # Returns groups of entries that share the same content hash
12
+ def detect
13
+ size_groups = @entries.group_by { |e| e.size }
14
+ size_groups.reject! { |size, _| size == 0 }
15
+
16
+ groups = []
17
+ size_groups.each_value do |group|
18
+ next if group.size < 2
19
+
20
+ hashed = group.map { |e| [e, compute_hash(e.path)] }
21
+ hash_groups = hashed.group_by { |_, h| h }
22
+
23
+ hash_groups.each_value do |hash_group|
24
+ next if hash_group.size < 2
25
+ groups << hash_group.map { |entry, _| entry }
26
+ end
27
+ end
28
+ groups
29
+ end
30
+
31
+ private
32
+
33
+ def compute_hash(path)
34
+ Digest::SHA256.file(path).hexdigest
35
+ rescue Errno::ENOENT, Errno::EACCES
36
+ Digest::SHA256.hexdigest(path)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module StorageGuardian
4
+ class Report
5
+ attr_reader :root, :entries, :duplicates, :bloat, :cold
6
+
7
+ def initialize(root, entries, duplicates, bloat, cold)
8
+ @root = root
9
+ @entries = entries
10
+ @duplicates = duplicates
11
+ @bloat = bloat
12
+ @cold = cold
13
+ end
14
+
15
+ def total_size
16
+ @entries.sum(&:size)
17
+ end
18
+
19
+ def duplicate_size
20
+ @duplicates.sum { |group| group.sum(&:size) - group.first.size }
21
+ end
22
+
23
+ def summary
24
+ {
25
+ total_files: @entries.size,
26
+ total_size_mb: (total_size / 1024.0 / 1024.0).round(2),
27
+ duplicate_groups: @duplicates.size,
28
+ duplicate_size_mb: (duplicate_size / 1024.0 / 1024.0).round(2),
29
+ bloat_findings: @bloat.size,
30
+ cold_files: @cold.size
31
+ }
32
+ end
33
+
34
+ def to_s
35
+ lines = []
36
+ lines << "=== Storage Guardian Report ==="
37
+ lines << "Root: #{@root}"
38
+ s = summary
39
+ lines << "Files: #{s[:total_files]} | Size: #{s[:total_size_mb]}MB"
40
+ lines << "Duplicates: #{s[:duplicate_groups]} groups (#{s[:duplicate_size_mb]}MB wasted)"
41
+ lines << "Bloat findings: #{s[:bloat_findings]}"
42
+ lines << "Cold files: #{s[:cold_files]}"
43
+ lines << ""
44
+
45
+ if @duplicates.any?
46
+ lines << "--- Duplicates ---"
47
+ @duplicates.each do |group|
48
+ lines << " #{group.size} copies: #{group.map(&:path).join(', ')}"
49
+ end
50
+ end
51
+
52
+ if @bloat.any?
53
+ lines << "--- Bloat ---"
54
+ @bloat.each { |b| lines << " [#{b[:severity].upcase}] #{b[:message]}" }
55
+ end
56
+
57
+ if @cold.any?
58
+ lines << "--- Cold Files (#{@cold.size}) ---"
59
+ @cold.first(5).each { |c| lines << " #{c[:message]}" }
60
+ lines << " ... and #{@cold.size - 5} more" if @cold.size > 5
61
+ end
62
+
63
+ lines.join("\n")
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module StorageGuardian
6
+ Entry = Struct.new(:path, :size, :mtime, :hash, keyword_init: true)
7
+
8
+ class Scanner
9
+ def initialize(root_path)
10
+ @root = root_path
11
+ end
12
+
13
+ def scan
14
+ entries = []
15
+ return entries unless File.directory?(@root)
16
+
17
+ Dir.glob(File.join(@root, "**", "*"), File::FNM_DOTMATCH).each do |path|
18
+ next if File.directory?(path)
19
+ next if File.symlink?(path)
20
+
21
+ begin
22
+ stat = File.stat(path)
23
+ entries << Entry.new(
24
+ path: path,
25
+ size: stat.size,
26
+ mtime: stat.mtime,
27
+ hash: nil # computed lazily by DuplicateDetector
28
+ )
29
+ rescue Errno::ENOENT, Errno::EACCES
30
+ next
31
+ end
32
+ end
33
+ entries
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "storage_guardian/budget"
4
+ require_relative "storage_guardian/scanner"
5
+ require_relative "storage_guardian/duplicate_detector"
6
+ require_relative "storage_guardian/bloat_detector"
7
+ require_relative "storage_guardian/cold_detector"
8
+ require_relative "storage_guardian/report"
9
+
10
+ module StorageGuardian
11
+ class Error < StandardError; end
12
+
13
+ def self.scan(path, budget: nil, cold_threshold_days: 90)
14
+ budget ||= Budget.new
15
+ scanner = Scanner.new(path)
16
+ entries = scanner.scan
17
+
18
+ duplicates = DuplicateDetector.new(entries).detect
19
+ bloat = BloatDetector.new(entries, budget).detect
20
+ cold = ColdDetector.new(entries, cold_threshold_days).detect
21
+
22
+ Report.new(path, entries, duplicates, bloat, cold)
23
+ end
24
+ end
metadata ADDED
@@ -0,0 +1,53 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: storage-guardian
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - SuperInstance
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-06-02 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Detect duplicate files, bloated directories, cold storage candidates,
14
+ and track storage budgets for Ruby projects.
15
+ email:
16
+ - team@superinstance.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - README.md
22
+ - lib/storage_guardian.rb
23
+ - lib/storage_guardian/bloat_detector.rb
24
+ - lib/storage_guardian/budget.rb
25
+ - lib/storage_guardian/cold_detector.rb
26
+ - lib/storage_guardian/duplicate_detector.rb
27
+ - lib/storage_guardian/report.rb
28
+ - lib/storage_guardian/scanner.rb
29
+ homepage: https://github.com/SuperInstance/gem-storage-guardian
30
+ licenses:
31
+ - MIT
32
+ metadata: {}
33
+ post_install_message:
34
+ rdoc_options: []
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '2.7'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ requirements: []
48
+ rubygems_version: 3.3.5
49
+ signing_key:
50
+ specification_version: 4
51
+ summary: Storage conservation guardian — budget tracking, duplicate detection, bloat
52
+ detection
53
+ test_files: []