storage-guardian 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +35 -0
- data/lib/storage_guardian/bloat_detector.rb +47 -0
- data/lib/storage_guardian/budget.rb +22 -0
- data/lib/storage_guardian/cold_detector.rb +25 -0
- data/lib/storage_guardian/duplicate_detector.rb +39 -0
- data/lib/storage_guardian/report.rb +66 -0
- data/lib/storage_guardian/scanner.rb +36 -0
- data/lib/storage_guardian.rb +24 -0
- metadata +53 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: d29b5d8b123281c76397d7cad89c3c4cca61ba4e1538c708a6cf7d495b589315
|
|
4
|
+
data.tar.gz: 7b2fafef38ba2b9821ae407cba74546c11c75861caf0be67edeae2ec3d3ae0a2
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 84d013eed13183f67f266f4b1014064a0bd7d93c437c2c15054ba1a8ff67f94792374fd5cbc1f0f607f7fd7fccaf1b65081fff6f31c7ab46582549bcc6410a7d
|
|
7
|
+
data.tar.gz: e9c9e5ff18fe07b0fed185476fc735816166685e98cf25c2207f50a4df424da14b86290ff4abc3ed2d6e3414df0ccbd33a2b959e8084376fa826528d61953b69
|
data/README.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Storage Guardian
|
|
2
|
+
|
|
3
|
+
Storage conservation guardian — duplicate detection, bloat detection, cold file detection, and budget tracking.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
gem install storage-guardian
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```ruby
|
|
14
|
+
require "storage_guardian"
|
|
15
|
+
|
|
16
|
+
# Quick scan
|
|
17
|
+
report = StorageGuardian.scan("/path/to/project")
|
|
18
|
+
puts report
|
|
19
|
+
|
|
20
|
+
# With custom budget
|
|
21
|
+
budget = StorageGuardian::Budget.new(max_file_size_mb: 50, max_dir_size_mb: 500)
|
|
22
|
+
report = StorageGuardian.scan("/path/to/project", budget: budget, cold_threshold_days: 60)
|
|
23
|
+
puts report.summary
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **Duplicate Detection** — SHA256 hash-based, groups identical files
|
|
29
|
+
- **Bloat Detection** — oversized files and directories vs budget
|
|
30
|
+
- **Cold File Detection** — files not modified in N days
|
|
31
|
+
- **Budget Tracking** — configurable limits
|
|
32
|
+
|
|
33
|
+
## License
|
|
34
|
+
|
|
35
|
+
MIT
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StorageGuardian
|
|
4
|
+
class BloatDetector
|
|
5
|
+
def initialize(entries, budget)
|
|
6
|
+
@entries = entries
|
|
7
|
+
@budget = budget
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def detect
|
|
11
|
+
findings = []
|
|
12
|
+
|
|
13
|
+
# Oversized files
|
|
14
|
+
max_file = @budget.limit_for(:max_file_size_mb) * 1024 * 1024
|
|
15
|
+
@entries.each do |e|
|
|
16
|
+
if e.size > max_file
|
|
17
|
+
findings << {
|
|
18
|
+
type: :oversized_file,
|
|
19
|
+
severity: :high,
|
|
20
|
+
path: e.path,
|
|
21
|
+
size_mb: (e.size / 1024.0 / 1024.0).round(2),
|
|
22
|
+
limit_mb: @budget.limit_for(:max_file_size_mb),
|
|
23
|
+
message: "File #{e.path} is #{(e.size / 1024.0 / 1024.0).round(2)}MB (limit: #{@budget.limit_for(:max_file_size_mb)}MB)"
|
|
24
|
+
}
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Bloated directories
|
|
29
|
+
max_dir = @budget.limit_for(:max_dir_size_mb) * 1024 * 1024
|
|
30
|
+
dir_sizes = @entries.each_with_object(Hash.new(0)) { |e, h| h[File.dirname(e.path)] += e.size }
|
|
31
|
+
dir_sizes.each do |dir, size|
|
|
32
|
+
if size > max_dir
|
|
33
|
+
findings << {
|
|
34
|
+
type: :bloated_directory,
|
|
35
|
+
severity: :medium,
|
|
36
|
+
path: dir,
|
|
37
|
+
size_mb: (size / 1024.0 / 1024.0).round(2),
|
|
38
|
+
limit_mb: @budget.limit_for(:max_dir_size_mb),
|
|
39
|
+
message: "Directory #{dir} is #{(size / 1024.0 / 1024.0).round(2)}MB (limit: #{@budget.limit_for(:max_dir_size_mb)}MB)"
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
findings
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StorageGuardian
|
|
4
|
+
class Budget
|
|
5
|
+
DEFAULTS = {
|
|
6
|
+
max_file_size_mb: 100,
|
|
7
|
+
max_dir_size_mb: 1024,
|
|
8
|
+
max_file_count: 10_000,
|
|
9
|
+
max_duplicate_ratio: 0.1
|
|
10
|
+
}.freeze
|
|
11
|
+
|
|
12
|
+
attr_reader :limits
|
|
13
|
+
|
|
14
|
+
def initialize(limits = {})
|
|
15
|
+
@limits = DEFAULTS.merge(limits.transform_keys(&:to_sym))
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def limit_for(key)
|
|
19
|
+
@limits[key.to_sym]
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StorageGuardian
|
|
4
|
+
class ColdDetector
|
|
5
|
+
def initialize(entries, threshold_days)
|
|
6
|
+
@entries = entries
|
|
7
|
+
@threshold_days = threshold_days
|
|
8
|
+
@cutoff = Time.now - (threshold_days * 86400)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def detect
|
|
12
|
+
@entries.select { |e| e.mtime < @cutoff }.map do |e|
|
|
13
|
+
{
|
|
14
|
+
type: :cold_file,
|
|
15
|
+
severity: :low,
|
|
16
|
+
path: e.path,
|
|
17
|
+
size_mb: (e.size / 1024.0 / 1024.0).round(2),
|
|
18
|
+
last_modified: e.mtime,
|
|
19
|
+
days_old: ((Time.now - e.mtime) / 86400).round,
|
|
20
|
+
message: "File #{e.path} not modified for #{((Time.now - e.mtime) / 86400).round} days (#{(e.size / 1024.0 / 1024.0).round(2)}MB)"
|
|
21
|
+
}
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module StorageGuardian
|
|
6
|
+
class DuplicateDetector
|
|
7
|
+
def initialize(entries)
|
|
8
|
+
@entries = entries
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Returns groups of entries that share the same content hash
|
|
12
|
+
def detect
|
|
13
|
+
size_groups = @entries.group_by { |e| e.size }
|
|
14
|
+
size_groups.reject! { |size, _| size == 0 }
|
|
15
|
+
|
|
16
|
+
groups = []
|
|
17
|
+
size_groups.each_value do |group|
|
|
18
|
+
next if group.size < 2
|
|
19
|
+
|
|
20
|
+
hashed = group.map { |e| [e, compute_hash(e.path)] }
|
|
21
|
+
hash_groups = hashed.group_by { |_, h| h }
|
|
22
|
+
|
|
23
|
+
hash_groups.each_value do |hash_group|
|
|
24
|
+
next if hash_group.size < 2
|
|
25
|
+
groups << hash_group.map { |entry, _| entry }
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
groups
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def compute_hash(path)
|
|
34
|
+
Digest::SHA256.file(path).hexdigest
|
|
35
|
+
rescue Errno::ENOENT, Errno::EACCES
|
|
36
|
+
Digest::SHA256.hexdigest(path)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module StorageGuardian
|
|
4
|
+
class Report
|
|
5
|
+
attr_reader :root, :entries, :duplicates, :bloat, :cold
|
|
6
|
+
|
|
7
|
+
def initialize(root, entries, duplicates, bloat, cold)
|
|
8
|
+
@root = root
|
|
9
|
+
@entries = entries
|
|
10
|
+
@duplicates = duplicates
|
|
11
|
+
@bloat = bloat
|
|
12
|
+
@cold = cold
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def total_size
|
|
16
|
+
@entries.sum(&:size)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def duplicate_size
|
|
20
|
+
@duplicates.sum { |group| group.sum(&:size) - group.first.size }
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def summary
|
|
24
|
+
{
|
|
25
|
+
total_files: @entries.size,
|
|
26
|
+
total_size_mb: (total_size / 1024.0 / 1024.0).round(2),
|
|
27
|
+
duplicate_groups: @duplicates.size,
|
|
28
|
+
duplicate_size_mb: (duplicate_size / 1024.0 / 1024.0).round(2),
|
|
29
|
+
bloat_findings: @bloat.size,
|
|
30
|
+
cold_files: @cold.size
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def to_s
|
|
35
|
+
lines = []
|
|
36
|
+
lines << "=== Storage Guardian Report ==="
|
|
37
|
+
lines << "Root: #{@root}"
|
|
38
|
+
s = summary
|
|
39
|
+
lines << "Files: #{s[:total_files]} | Size: #{s[:total_size_mb]}MB"
|
|
40
|
+
lines << "Duplicates: #{s[:duplicate_groups]} groups (#{s[:duplicate_size_mb]}MB wasted)"
|
|
41
|
+
lines << "Bloat findings: #{s[:bloat_findings]}"
|
|
42
|
+
lines << "Cold files: #{s[:cold_files]}"
|
|
43
|
+
lines << ""
|
|
44
|
+
|
|
45
|
+
if @duplicates.any?
|
|
46
|
+
lines << "--- Duplicates ---"
|
|
47
|
+
@duplicates.each do |group|
|
|
48
|
+
lines << " #{group.size} copies: #{group.map(&:path).join(', ')}"
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
if @bloat.any?
|
|
53
|
+
lines << "--- Bloat ---"
|
|
54
|
+
@bloat.each { |b| lines << " [#{b[:severity].upcase}] #{b[:message]}" }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
if @cold.any?
|
|
58
|
+
lines << "--- Cold Files (#{@cold.size}) ---"
|
|
59
|
+
@cold.first(5).each { |c| lines << " #{c[:message]}" }
|
|
60
|
+
lines << " ... and #{@cold.size - 5} more" if @cold.size > 5
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
lines.join("\n")
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module StorageGuardian
|
|
6
|
+
Entry = Struct.new(:path, :size, :mtime, :hash, keyword_init: true)
|
|
7
|
+
|
|
8
|
+
class Scanner
|
|
9
|
+
def initialize(root_path)
|
|
10
|
+
@root = root_path
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def scan
|
|
14
|
+
entries = []
|
|
15
|
+
return entries unless File.directory?(@root)
|
|
16
|
+
|
|
17
|
+
Dir.glob(File.join(@root, "**", "*"), File::FNM_DOTMATCH).each do |path|
|
|
18
|
+
next if File.directory?(path)
|
|
19
|
+
next if File.symlink?(path)
|
|
20
|
+
|
|
21
|
+
begin
|
|
22
|
+
stat = File.stat(path)
|
|
23
|
+
entries << Entry.new(
|
|
24
|
+
path: path,
|
|
25
|
+
size: stat.size,
|
|
26
|
+
mtime: stat.mtime,
|
|
27
|
+
hash: nil # computed lazily by DuplicateDetector
|
|
28
|
+
)
|
|
29
|
+
rescue Errno::ENOENT, Errno::EACCES
|
|
30
|
+
next
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
entries
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "storage_guardian/budget"
|
|
4
|
+
require_relative "storage_guardian/scanner"
|
|
5
|
+
require_relative "storage_guardian/duplicate_detector"
|
|
6
|
+
require_relative "storage_guardian/bloat_detector"
|
|
7
|
+
require_relative "storage_guardian/cold_detector"
|
|
8
|
+
require_relative "storage_guardian/report"
|
|
9
|
+
|
|
10
|
+
module StorageGuardian
|
|
11
|
+
class Error < StandardError; end
|
|
12
|
+
|
|
13
|
+
def self.scan(path, budget: nil, cold_threshold_days: 90)
|
|
14
|
+
budget ||= Budget.new
|
|
15
|
+
scanner = Scanner.new(path)
|
|
16
|
+
entries = scanner.scan
|
|
17
|
+
|
|
18
|
+
duplicates = DuplicateDetector.new(entries).detect
|
|
19
|
+
bloat = BloatDetector.new(entries, budget).detect
|
|
20
|
+
cold = ColdDetector.new(entries, cold_threshold_days).detect
|
|
21
|
+
|
|
22
|
+
Report.new(path, entries, duplicates, bloat, cold)
|
|
23
|
+
end
|
|
24
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: storage-guardian
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- SuperInstance
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-06-02 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: Detect duplicate files, bloated directories, cold storage candidates,
|
|
14
|
+
and track storage budgets for Ruby projects.
|
|
15
|
+
email:
|
|
16
|
+
- team@superinstance.com
|
|
17
|
+
executables: []
|
|
18
|
+
extensions: []
|
|
19
|
+
extra_rdoc_files: []
|
|
20
|
+
files:
|
|
21
|
+
- README.md
|
|
22
|
+
- lib/storage_guardian.rb
|
|
23
|
+
- lib/storage_guardian/bloat_detector.rb
|
|
24
|
+
- lib/storage_guardian/budget.rb
|
|
25
|
+
- lib/storage_guardian/cold_detector.rb
|
|
26
|
+
- lib/storage_guardian/duplicate_detector.rb
|
|
27
|
+
- lib/storage_guardian/report.rb
|
|
28
|
+
- lib/storage_guardian/scanner.rb
|
|
29
|
+
homepage: https://github.com/SuperInstance/gem-storage-guardian
|
|
30
|
+
licenses:
|
|
31
|
+
- MIT
|
|
32
|
+
metadata: {}
|
|
33
|
+
post_install_message:
|
|
34
|
+
rdoc_options: []
|
|
35
|
+
require_paths:
|
|
36
|
+
- lib
|
|
37
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
38
|
+
requirements:
|
|
39
|
+
- - ">="
|
|
40
|
+
- !ruby/object:Gem::Version
|
|
41
|
+
version: '2.7'
|
|
42
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '0'
|
|
47
|
+
requirements: []
|
|
48
|
+
rubygems_version: 3.3.5
|
|
49
|
+
signing_key:
|
|
50
|
+
specification_version: 4
|
|
51
|
+
summary: Storage conservation guardian — budget tracking, duplicate detection, bloat
|
|
52
|
+
detection
|
|
53
|
+
test_files: []
|