active_storage_dedup 1.0.0.alpha → 1.0.0.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +34 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +1 -1
- data/lib/active_storage_dedup/attachment_options.rb +2 -2
- data/lib/active_storage_dedup/blob_deduplication.rb +4 -4
- data/lib/active_storage_dedup/deduplication_job.rb +9 -9
- data/lib/active_storage_dedup/version.rb +1 -1
- data/lib/tasks/active_storage_dedup.rake +9 -11
- metadata +22 -22
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fcef2e3f799ca998bca328d8324f3e5038adf52508c73b61fa20ce7e6a4a9a30
|
|
4
|
+
data.tar.gz: ce928b0477db7dbd8360b15544e5cb8935ad177340f01685f5d06323599ab820
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3af226e7307b32aead229012fcbf657bc9ddc2ce56167cb30e07100440582a084d9449ed749d9626874eadfdb60987bc89bfacffcb3252b35465249b703bb06f
|
|
7
|
+
data.tar.gz: b55a9314af3dfa0102f14dce01791af79a751f77a2fc2a296a77a9331a70e939196fb60a7a6b0441b10c207db75e4ebe5d3212004f9a1320854acfc95233d404
|
data/.rubocop.yml
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
AllCops:
|
|
2
|
-
TargetRubyVersion:
|
|
2
|
+
TargetRubyVersion: 2.7
|
|
3
|
+
|
|
4
|
+
Metrics/BlockLength:
|
|
5
|
+
Enabled: false
|
|
6
|
+
|
|
7
|
+
Metrics/MethodLength:
|
|
8
|
+
Enabled: false
|
|
9
|
+
|
|
10
|
+
Metrics/ClassLength:
|
|
11
|
+
Enabled: false
|
|
12
|
+
|
|
13
|
+
Metrics/ModuleLength:
|
|
14
|
+
Enabled: false
|
|
15
|
+
|
|
16
|
+
Metrics/ParameterLists:
|
|
17
|
+
Enabled: false
|
|
18
|
+
|
|
19
|
+
Metrics/AbcSize:
|
|
20
|
+
Enabled: false
|
|
21
|
+
|
|
22
|
+
Metrics/CyclomaticComplexity:
|
|
23
|
+
Enabled: false
|
|
24
|
+
|
|
25
|
+
Metrics/PerceivedComplexity:
|
|
26
|
+
Enabled: false
|
|
27
|
+
|
|
28
|
+
Layout/LineLength:
|
|
29
|
+
Enabled: false
|
|
30
|
+
|
|
31
|
+
Naming/VariableNumber:
|
|
32
|
+
Enabled: false
|
|
33
|
+
|
|
34
|
+
Naming/MethodParameterName:
|
|
35
|
+
Enabled: false
|
|
3
36
|
|
|
4
37
|
Style/StringLiterals:
|
|
5
38
|
EnforcedStyle: double_quotes
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [1.0.0.beta] - 2025-12-01
|
|
4
|
+
|
|
5
|
+
- Enforce minimum support of Rails 7.2 and Ruby 3.1+ (`22910a5`)
|
|
6
|
+
- Limit compatibility to officially supported Rails/Ruby versions (`e8c1391`)
|
|
7
|
+
- Default the test run to specs only for faster feedback (`398b015`)
|
|
8
|
+
- Expand CI matrix to cover all current Rails and Ruby releases (`16d90c9`)
|
|
9
|
+
- Polish specs and linting (`716c0ff`, `7f1c556`)
|
|
10
|
+
- Link the sample test application in docs (`62fcc33`)
|
|
11
|
+
|
|
3
12
|
## [1.0.0.alpha] - 2025-11-23
|
|
4
13
|
|
|
5
14
|
- Initial release
|
data/Rakefile
CHANGED
|
@@ -7,7 +7,7 @@ module ActiveStorageDedup
|
|
|
7
7
|
ActiveStorageDedup.register_attachment(self.name, name, deduplicate: deduplicate)
|
|
8
8
|
|
|
9
9
|
super(name, dependent: dependent, service: service,
|
|
10
|
-
|
|
10
|
+
strict_loading: strict_loading, **options)
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def has_many_attached(name, dependent: :purge_later, service: nil,
|
|
@@ -15,7 +15,7 @@ module ActiveStorageDedup
|
|
|
15
15
|
ActiveStorageDedup.register_attachment(self.name, name, deduplicate: deduplicate)
|
|
16
16
|
|
|
17
17
|
super(name, dependent: dependent, service: service,
|
|
18
|
-
|
|
18
|
+
strict_loading: strict_loading, **options)
|
|
19
19
|
end
|
|
20
20
|
end
|
|
21
21
|
end
|
|
@@ -30,7 +30,7 @@ module ActiveStorageDedup
|
|
|
30
30
|
actual_service_name = blob.service_name || service.name
|
|
31
31
|
Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{blob.checksum[0..12]}..., service=#{actual_service_name}"
|
|
32
32
|
|
|
33
|
-
if existing_blob = find_by(checksum: blob.checksum, service_name: actual_service_name)
|
|
33
|
+
if (existing_blob = find_by(checksum: blob.checksum, service_name: actual_service_name))
|
|
34
34
|
Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} (checksum: #{blob.checksum[0..12]}..., service: #{actual_service_name})"
|
|
35
35
|
return existing_blob
|
|
36
36
|
end
|
|
@@ -43,7 +43,7 @@ module ActiveStorageDedup
|
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
# HOOK 2: Direct uploads to cloud storage
|
|
46
|
-
def create_before_direct_upload!(
|
|
46
|
+
def create_before_direct_upload!(filename:, byte_size:, checksum:, key: nil,
|
|
47
47
|
content_type: nil, metadata: nil,
|
|
48
48
|
service_name: nil,
|
|
49
49
|
__dedup_record: nil, __dedup_attachment_name: nil, **options)
|
|
@@ -69,7 +69,7 @@ module ActiveStorageDedup
|
|
|
69
69
|
Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{checksum[0..12]}..., service=#{actual_service_name}"
|
|
70
70
|
|
|
71
71
|
# Check for existing blob
|
|
72
|
-
if existing_blob = find_by(checksum: checksum, service_name: actual_service_name)
|
|
72
|
+
if (existing_blob = find_by(checksum: checksum, service_name: actual_service_name))
|
|
73
73
|
Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} for direct upload (checksum: #{checksum[0..12]}..., service: #{actual_service_name})"
|
|
74
74
|
return existing_blob
|
|
75
75
|
end
|
|
@@ -86,7 +86,7 @@ module ActiveStorageDedup
|
|
|
86
86
|
end
|
|
87
87
|
|
|
88
88
|
# HOOK 3: Fallback for programmatic attach (record.file.attach(io: ...))
|
|
89
|
-
def create_after_unfurling!(key: nil,
|
|
89
|
+
def create_after_unfurling!(io:, filename:, key: nil, content_type: nil,
|
|
90
90
|
metadata: nil, service_name: nil, identify: true,
|
|
91
91
|
__dedup_record: nil, __dedup_attachment_name: nil, **options)
|
|
92
92
|
Rails.logger.debug "[ActiveStorageDedup] create_after_unfurling! called for #{filename}"
|
|
@@ -23,10 +23,10 @@ module ActiveStorageDedup
|
|
|
23
23
|
|
|
24
24
|
# Find all checksum+service combinations that have duplicates
|
|
25
25
|
duplicate_groups = ActiveStorage::Blob
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
.select(:checksum, :service_name)
|
|
27
|
+
.group(:checksum, :service_name)
|
|
28
|
+
.having("COUNT(*) > 1")
|
|
29
|
+
.count
|
|
30
30
|
|
|
31
31
|
if duplicate_groups.empty?
|
|
32
32
|
Rails.logger.info "[ActiveStorageDedup] ✓ No duplicate blobs found - database is clean!"
|
|
@@ -36,7 +36,7 @@ module ActiveStorageDedup
|
|
|
36
36
|
Rails.logger.info "[ActiveStorageDedup] Found #{duplicate_groups.size} group(s) with duplicates"
|
|
37
37
|
|
|
38
38
|
total_merged = 0
|
|
39
|
-
duplicate_groups.
|
|
39
|
+
duplicate_groups.each_key do |(checksum, service_name)|
|
|
40
40
|
merged = process_duplicate_group(checksum, service_name)
|
|
41
41
|
total_merged += merged
|
|
42
42
|
end
|
|
@@ -51,9 +51,9 @@ module ActiveStorageDedup
|
|
|
51
51
|
|
|
52
52
|
# Find all blobs with same checksum and service
|
|
53
53
|
duplicate_blobs = ActiveStorage::Blob
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
.where(checksum: checksum, service_name: service_name)
|
|
55
|
+
.order(:created_at)
|
|
56
|
+
.to_a
|
|
57
57
|
|
|
58
58
|
Rails.logger.debug "[ActiveStorageDedup] Found #{duplicate_blobs.size} blob(s) with checksum #{checksum[0..12]}..."
|
|
59
59
|
|
|
@@ -91,7 +91,7 @@ module ActiveStorageDedup
|
|
|
91
91
|
Rails.logger.debug "[ActiveStorageDedup] Deleted duplicate blob #{duplicate.id} record"
|
|
92
92
|
|
|
93
93
|
Rails.logger.info "[ActiveStorageDedup] ✓ Merged blob #{duplicate.id} (#{attachment_count} attachment(s)) into #{keeper.id}"
|
|
94
|
-
rescue => e
|
|
94
|
+
rescue StandardError => e
|
|
95
95
|
Rails.logger.error "[ActiveStorageDedup] ✗ Error merging blob #{duplicate.id}: #{e.class.name} - #{e.message}"
|
|
96
96
|
Rails.logger.debug "[ActiveStorageDedup] Error backtrace: #{e.backtrace.first(5).join("\n")}"
|
|
97
97
|
# Don't raise - allow job to complete for other duplicates
|
|
@@ -7,10 +7,10 @@ namespace :active_storage_dedup do
|
|
|
7
7
|
|
|
8
8
|
# Group blobs by checksum and service_name, find groups with duplicates
|
|
9
9
|
duplicate_groups = ActiveStorage::Blob
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
.select("checksum, service_name, COUNT(*) as blob_count")
|
|
11
|
+
.group(:checksum, :service_name)
|
|
12
|
+
.having("COUNT(*) > 1")
|
|
13
|
+
.order("blob_count DESC")
|
|
14
14
|
|
|
15
15
|
if duplicate_groups.empty?
|
|
16
16
|
puts "No duplicate blobs found!"
|
|
@@ -22,8 +22,8 @@ namespace :active_storage_dedup do
|
|
|
22
22
|
|
|
23
23
|
duplicate_groups.each do |group|
|
|
24
24
|
blobs = ActiveStorage::Blob
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
.where(checksum: group.checksum, service_name: group.service_name)
|
|
26
|
+
.order(:created_at)
|
|
27
27
|
|
|
28
28
|
keeper = blobs.first
|
|
29
29
|
duplicates = blobs[1..]
|
|
@@ -37,7 +37,7 @@ namespace :active_storage_dedup do
|
|
|
37
37
|
puts "Filename: #{keeper.filename}"
|
|
38
38
|
puts "Total blobs: #{blobs.count}"
|
|
39
39
|
puts "Keeper blob ID: #{keeper.id} (#{keeper.attachments.count} attachments)"
|
|
40
|
-
puts "Duplicate blob IDs: #{duplicates.map(&:id).join(
|
|
40
|
+
puts "Duplicate blob IDs: #{duplicates.map(&:id).join(", ")}"
|
|
41
41
|
puts "Total attachments across duplicates: #{duplicates.sum { |b| b.attachments.count }}"
|
|
42
42
|
puts "Wasted storage: #{format_bytes(wasted_bytes)}"
|
|
43
43
|
puts "-" * 80
|
|
@@ -78,9 +78,7 @@ namespace :active_storage_dedup do
|
|
|
78
78
|
updated += 1
|
|
79
79
|
end
|
|
80
80
|
|
|
81
|
-
if (index + 1) % 100
|
|
82
|
-
puts "Processed #{index + 1}/#{total_blobs} blobs..."
|
|
83
|
-
end
|
|
81
|
+
puts "Processed #{index + 1}/#{total_blobs} blobs..." if ((index + 1) % 100).zero?
|
|
84
82
|
end
|
|
85
83
|
|
|
86
84
|
puts "\nBackfill complete!"
|
|
@@ -92,7 +90,7 @@ namespace :active_storage_dedup do
|
|
|
92
90
|
def format_bytes(bytes)
|
|
93
91
|
return "0 B" if bytes.zero?
|
|
94
92
|
|
|
95
|
-
units = [
|
|
93
|
+
units = %w[B KB MB GB TB]
|
|
96
94
|
exp = (Math.log(bytes) / Math.log(1024)).floor
|
|
97
95
|
exp = [exp, units.length - 1].min
|
|
98
96
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: active_storage_dedup
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.0.
|
|
4
|
+
version: 1.0.0.beta
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- coderhs
|
|
@@ -10,63 +10,63 @@ cert_chain: []
|
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
|
-
name:
|
|
13
|
+
name: activestorage
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
15
15
|
requirements:
|
|
16
16
|
- - ">="
|
|
17
17
|
- !ruby/object:Gem::Version
|
|
18
|
-
version:
|
|
18
|
+
version: 7.2.0
|
|
19
19
|
type: :runtime
|
|
20
20
|
prerelease: false
|
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
22
|
requirements:
|
|
23
23
|
- - ">="
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
|
-
version:
|
|
25
|
+
version: 7.2.0
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
|
-
name:
|
|
27
|
+
name: rails
|
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
|
29
29
|
requirements:
|
|
30
30
|
- - ">="
|
|
31
31
|
- !ruby/object:Gem::Version
|
|
32
|
-
version:
|
|
32
|
+
version: 7.2.0
|
|
33
33
|
type: :runtime
|
|
34
34
|
prerelease: false
|
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
36
|
requirements:
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
|
-
version:
|
|
39
|
+
version: 7.2.0
|
|
40
40
|
- !ruby/object:Gem::Dependency
|
|
41
|
-
name:
|
|
41
|
+
name: combustion
|
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
|
43
43
|
requirements:
|
|
44
44
|
- - "~>"
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: '3
|
|
46
|
+
version: '1.3'
|
|
47
47
|
type: :development
|
|
48
48
|
prerelease: false
|
|
49
49
|
version_requirements: !ruby/object:Gem::Requirement
|
|
50
50
|
requirements:
|
|
51
51
|
- - "~>"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: '3
|
|
53
|
+
version: '1.3'
|
|
54
54
|
- !ruby/object:Gem::Dependency
|
|
55
|
-
name: rspec
|
|
55
|
+
name: rspec
|
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|
|
57
57
|
requirements:
|
|
58
|
-
- - "
|
|
58
|
+
- - "~>"
|
|
59
59
|
- !ruby/object:Gem::Version
|
|
60
|
-
version: '0'
|
|
60
|
+
version: '3.0'
|
|
61
61
|
type: :development
|
|
62
62
|
prerelease: false
|
|
63
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
64
64
|
requirements:
|
|
65
|
-
- - "
|
|
65
|
+
- - "~>"
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
|
-
version: '0'
|
|
67
|
+
version: '3.0'
|
|
68
68
|
- !ruby/object:Gem::Dependency
|
|
69
|
-
name:
|
|
69
|
+
name: rspec-rails
|
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|
|
71
71
|
requirements:
|
|
72
72
|
- - ">="
|
|
@@ -94,19 +94,19 @@ dependencies:
|
|
|
94
94
|
- !ruby/object:Gem::Version
|
|
95
95
|
version: '0'
|
|
96
96
|
- !ruby/object:Gem::Dependency
|
|
97
|
-
name:
|
|
97
|
+
name: sqlite3
|
|
98
98
|
requirement: !ruby/object:Gem::Requirement
|
|
99
99
|
requirements:
|
|
100
|
-
- - "
|
|
100
|
+
- - ">="
|
|
101
101
|
- !ruby/object:Gem::Version
|
|
102
|
-
version: '
|
|
102
|
+
version: '0'
|
|
103
103
|
type: :development
|
|
104
104
|
prerelease: false
|
|
105
105
|
version_requirements: !ruby/object:Gem::Requirement
|
|
106
106
|
requirements:
|
|
107
|
-
- - "
|
|
107
|
+
- - ">="
|
|
108
108
|
- !ruby/object:Gem::Version
|
|
109
|
-
version: '
|
|
109
|
+
version: '0'
|
|
110
110
|
description: Prevents duplicate file uploads in Active Storage by reusing existing
|
|
111
111
|
blobs with matching checksums and service names
|
|
112
112
|
email:
|
|
@@ -151,7 +151,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
151
151
|
requirements:
|
|
152
152
|
- - ">="
|
|
153
153
|
- !ruby/object:Gem::Version
|
|
154
|
-
version:
|
|
154
|
+
version: 3.1.0
|
|
155
155
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
156
|
requirements:
|
|
157
157
|
- - ">="
|