active_storage_dedup 1.0.0.alpha → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +34 -1
- data/CHANGELOG.md +9 -0
- data/Rakefile +1 -1
- data/lib/active_storage_dedup/attachment_options.rb +2 -2
- data/lib/active_storage_dedup/blob_deduplication.rb +4 -4
- data/lib/active_storage_dedup/deduplication_job.rb +9 -9
- data/lib/active_storage_dedup/version.rb +1 -1
- data/lib/tasks/active_storage_dedup.rake +9 -11
- metadata +34 -17
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5dfa6bc9e82e985f0b83a8aa8f3a3cde0f7acb32c8b1a16fce73cc27047bb656
|
|
4
|
+
data.tar.gz: 39d1530df0dc5c511c00690336f53f4467eacb2c0025a40ead68d7b944907f0e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 64d835f96491812ddddda9167ed43faf18417babb7b974835018cb9fd599e8d41d601ec08907da079d78c2346f06e388d8179b1180f94c77d72c4ed481fcbee7
|
|
7
|
+
data.tar.gz: '0951425855ea3c076784590b1ed3aeeb6d27590894b494ec8b43a5b2880d144d3b4c741e217c2079d5b4cadeb62319f5f64313e9cbe896b9c00dea91f53d599a'
|
data/.rubocop.yml
CHANGED
|
@@ -1,5 +1,38 @@
|
|
|
1
1
|
AllCops:
|
|
2
|
-
TargetRubyVersion:
|
|
2
|
+
TargetRubyVersion: 2.7
|
|
3
|
+
|
|
4
|
+
Metrics/BlockLength:
|
|
5
|
+
Enabled: false
|
|
6
|
+
|
|
7
|
+
Metrics/MethodLength:
|
|
8
|
+
Enabled: false
|
|
9
|
+
|
|
10
|
+
Metrics/ClassLength:
|
|
11
|
+
Enabled: false
|
|
12
|
+
|
|
13
|
+
Metrics/ModuleLength:
|
|
14
|
+
Enabled: false
|
|
15
|
+
|
|
16
|
+
Metrics/ParameterLists:
|
|
17
|
+
Enabled: false
|
|
18
|
+
|
|
19
|
+
Metrics/AbcSize:
|
|
20
|
+
Enabled: false
|
|
21
|
+
|
|
22
|
+
Metrics/CyclomaticComplexity:
|
|
23
|
+
Enabled: false
|
|
24
|
+
|
|
25
|
+
Metrics/PerceivedComplexity:
|
|
26
|
+
Enabled: false
|
|
27
|
+
|
|
28
|
+
Layout/LineLength:
|
|
29
|
+
Enabled: false
|
|
30
|
+
|
|
31
|
+
Naming/VariableNumber:
|
|
32
|
+
Enabled: false
|
|
33
|
+
|
|
34
|
+
Naming/MethodParameterName:
|
|
35
|
+
Enabled: false
|
|
3
36
|
|
|
4
37
|
Style/StringLiterals:
|
|
5
38
|
EnforcedStyle: double_quotes
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [1.0.0.beta] - 2025-12-01
|
|
4
|
+
|
|
5
|
+
- Enforce minimum support of Rails 7.2 and Ruby 3.1+ (`22910a5`)
|
|
6
|
+
- Limit compatibility to officially supported Rails/Ruby versions (`e8c1391`)
|
|
7
|
+
- Default the test run to specs only for faster feedback (`398b015`)
|
|
8
|
+
- Expand CI matrix to cover all current Rails and Ruby releases (`16d90c9`)
|
|
9
|
+
- Polish specs and linting (`716c0ff`, `7f1c556`)
|
|
10
|
+
- Link the sample test application in docs (`62fcc33`)
|
|
11
|
+
|
|
3
12
|
## [1.0.0.alpha] - 2025-11-23
|
|
4
13
|
|
|
5
14
|
- Initial release
|
data/Rakefile
CHANGED
|
@@ -7,7 +7,7 @@ module ActiveStorageDedup
|
|
|
7
7
|
ActiveStorageDedup.register_attachment(self.name, name, deduplicate: deduplicate)
|
|
8
8
|
|
|
9
9
|
super(name, dependent: dependent, service: service,
|
|
10
|
-
|
|
10
|
+
strict_loading: strict_loading, **options)
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
def has_many_attached(name, dependent: :purge_later, service: nil,
|
|
@@ -15,7 +15,7 @@ module ActiveStorageDedup
|
|
|
15
15
|
ActiveStorageDedup.register_attachment(self.name, name, deduplicate: deduplicate)
|
|
16
16
|
|
|
17
17
|
super(name, dependent: dependent, service: service,
|
|
18
|
-
|
|
18
|
+
strict_loading: strict_loading, **options)
|
|
19
19
|
end
|
|
20
20
|
end
|
|
21
21
|
end
|
|
@@ -30,7 +30,7 @@ module ActiveStorageDedup
|
|
|
30
30
|
actual_service_name = blob.service_name || service.name
|
|
31
31
|
Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{blob.checksum[0..12]}..., service=#{actual_service_name}"
|
|
32
32
|
|
|
33
|
-
if existing_blob = find_by(checksum: blob.checksum, service_name: actual_service_name)
|
|
33
|
+
if (existing_blob = find_by(checksum: blob.checksum, service_name: actual_service_name))
|
|
34
34
|
Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} (checksum: #{blob.checksum[0..12]}..., service: #{actual_service_name})"
|
|
35
35
|
return existing_blob
|
|
36
36
|
end
|
|
@@ -43,7 +43,7 @@ module ActiveStorageDedup
|
|
|
43
43
|
end
|
|
44
44
|
|
|
45
45
|
# HOOK 2: Direct uploads to cloud storage
|
|
46
|
-
def create_before_direct_upload!(
|
|
46
|
+
def create_before_direct_upload!(filename:, byte_size:, checksum:, key: nil,
|
|
47
47
|
content_type: nil, metadata: nil,
|
|
48
48
|
service_name: nil,
|
|
49
49
|
__dedup_record: nil, __dedup_attachment_name: nil, **options)
|
|
@@ -69,7 +69,7 @@ module ActiveStorageDedup
|
|
|
69
69
|
Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{checksum[0..12]}..., service=#{actual_service_name}"
|
|
70
70
|
|
|
71
71
|
# Check for existing blob
|
|
72
|
-
if existing_blob = find_by(checksum: checksum, service_name: actual_service_name)
|
|
72
|
+
if (existing_blob = find_by(checksum: checksum, service_name: actual_service_name))
|
|
73
73
|
Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} for direct upload (checksum: #{checksum[0..12]}..., service: #{actual_service_name})"
|
|
74
74
|
return existing_blob
|
|
75
75
|
end
|
|
@@ -86,7 +86,7 @@ module ActiveStorageDedup
|
|
|
86
86
|
end
|
|
87
87
|
|
|
88
88
|
# HOOK 3: Fallback for programmatic attach (record.file.attach(io: ...))
|
|
89
|
-
def create_after_unfurling!(key: nil,
|
|
89
|
+
def create_after_unfurling!(io:, filename:, key: nil, content_type: nil,
|
|
90
90
|
metadata: nil, service_name: nil, identify: true,
|
|
91
91
|
__dedup_record: nil, __dedup_attachment_name: nil, **options)
|
|
92
92
|
Rails.logger.debug "[ActiveStorageDedup] create_after_unfurling! called for #{filename}"
|
|
@@ -23,10 +23,10 @@ module ActiveStorageDedup
|
|
|
23
23
|
|
|
24
24
|
# Find all checksum+service combinations that have duplicates
|
|
25
25
|
duplicate_groups = ActiveStorage::Blob
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
.select(:checksum, :service_name)
|
|
27
|
+
.group(:checksum, :service_name)
|
|
28
|
+
.having("COUNT(*) > 1")
|
|
29
|
+
.count
|
|
30
30
|
|
|
31
31
|
if duplicate_groups.empty?
|
|
32
32
|
Rails.logger.info "[ActiveStorageDedup] ✓ No duplicate blobs found - database is clean!"
|
|
@@ -36,7 +36,7 @@ module ActiveStorageDedup
|
|
|
36
36
|
Rails.logger.info "[ActiveStorageDedup] Found #{duplicate_groups.size} group(s) with duplicates"
|
|
37
37
|
|
|
38
38
|
total_merged = 0
|
|
39
|
-
duplicate_groups.
|
|
39
|
+
duplicate_groups.each_key do |(checksum, service_name)|
|
|
40
40
|
merged = process_duplicate_group(checksum, service_name)
|
|
41
41
|
total_merged += merged
|
|
42
42
|
end
|
|
@@ -51,9 +51,9 @@ module ActiveStorageDedup
|
|
|
51
51
|
|
|
52
52
|
# Find all blobs with same checksum and service
|
|
53
53
|
duplicate_blobs = ActiveStorage::Blob
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
.where(checksum: checksum, service_name: service_name)
|
|
55
|
+
.order(:created_at)
|
|
56
|
+
.to_a
|
|
57
57
|
|
|
58
58
|
Rails.logger.debug "[ActiveStorageDedup] Found #{duplicate_blobs.size} blob(s) with checksum #{checksum[0..12]}..."
|
|
59
59
|
|
|
@@ -91,7 +91,7 @@ module ActiveStorageDedup
|
|
|
91
91
|
Rails.logger.debug "[ActiveStorageDedup] Deleted duplicate blob #{duplicate.id} record"
|
|
92
92
|
|
|
93
93
|
Rails.logger.info "[ActiveStorageDedup] ✓ Merged blob #{duplicate.id} (#{attachment_count} attachment(s)) into #{keeper.id}"
|
|
94
|
-
rescue => e
|
|
94
|
+
rescue StandardError => e
|
|
95
95
|
Rails.logger.error "[ActiveStorageDedup] ✗ Error merging blob #{duplicate.id}: #{e.class.name} - #{e.message}"
|
|
96
96
|
Rails.logger.debug "[ActiveStorageDedup] Error backtrace: #{e.backtrace.first(5).join("\n")}"
|
|
97
97
|
# Don't raise - allow job to complete for other duplicates
|
|
@@ -7,10 +7,10 @@ namespace :active_storage_dedup do
|
|
|
7
7
|
|
|
8
8
|
# Group blobs by checksum and service_name, find groups with duplicates
|
|
9
9
|
duplicate_groups = ActiveStorage::Blob
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
.select("checksum, service_name, COUNT(*) as blob_count")
|
|
11
|
+
.group(:checksum, :service_name)
|
|
12
|
+
.having("COUNT(*) > 1")
|
|
13
|
+
.order("blob_count DESC")
|
|
14
14
|
|
|
15
15
|
if duplicate_groups.empty?
|
|
16
16
|
puts "No duplicate blobs found!"
|
|
@@ -22,8 +22,8 @@ namespace :active_storage_dedup do
|
|
|
22
22
|
|
|
23
23
|
duplicate_groups.each do |group|
|
|
24
24
|
blobs = ActiveStorage::Blob
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
.where(checksum: group.checksum, service_name: group.service_name)
|
|
26
|
+
.order(:created_at)
|
|
27
27
|
|
|
28
28
|
keeper = blobs.first
|
|
29
29
|
duplicates = blobs[1..]
|
|
@@ -37,7 +37,7 @@ namespace :active_storage_dedup do
|
|
|
37
37
|
puts "Filename: #{keeper.filename}"
|
|
38
38
|
puts "Total blobs: #{blobs.count}"
|
|
39
39
|
puts "Keeper blob ID: #{keeper.id} (#{keeper.attachments.count} attachments)"
|
|
40
|
-
puts "Duplicate blob IDs: #{duplicates.map(&:id).join(
|
|
40
|
+
puts "Duplicate blob IDs: #{duplicates.map(&:id).join(", ")}"
|
|
41
41
|
puts "Total attachments across duplicates: #{duplicates.sum { |b| b.attachments.count }}"
|
|
42
42
|
puts "Wasted storage: #{format_bytes(wasted_bytes)}"
|
|
43
43
|
puts "-" * 80
|
|
@@ -78,9 +78,7 @@ namespace :active_storage_dedup do
|
|
|
78
78
|
updated += 1
|
|
79
79
|
end
|
|
80
80
|
|
|
81
|
-
if (index + 1) % 100
|
|
82
|
-
puts "Processed #{index + 1}/#{total_blobs} blobs..."
|
|
83
|
-
end
|
|
81
|
+
puts "Processed #{index + 1}/#{total_blobs} blobs..." if ((index + 1) % 100).zero?
|
|
84
82
|
end
|
|
85
83
|
|
|
86
84
|
puts "\nBackfill complete!"
|
|
@@ -92,7 +90,7 @@ namespace :active_storage_dedup do
|
|
|
92
90
|
def format_bytes(bytes)
|
|
93
91
|
return "0 B" if bytes.zero?
|
|
94
92
|
|
|
95
|
-
units = [
|
|
93
|
+
units = %w[B KB MB GB TB]
|
|
96
94
|
exp = (Math.log(bytes) / Math.log(1024)).floor
|
|
97
95
|
exp = [exp, units.length - 1].min
|
|
98
96
|
|
metadata
CHANGED
|
@@ -1,42 +1,57 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: active_storage_dedup
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.0
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- coderhs
|
|
8
|
+
autorequire:
|
|
8
9
|
bindir: exe
|
|
9
10
|
cert_chain: []
|
|
10
|
-
date:
|
|
11
|
+
date: 2025-12-06 00:00:00.000000000 Z
|
|
11
12
|
dependencies:
|
|
12
13
|
- !ruby/object:Gem::Dependency
|
|
13
|
-
name:
|
|
14
|
+
name: activestorage
|
|
14
15
|
requirement: !ruby/object:Gem::Requirement
|
|
15
16
|
requirements:
|
|
16
17
|
- - ">="
|
|
17
18
|
- !ruby/object:Gem::Version
|
|
18
|
-
version:
|
|
19
|
+
version: 7.2.0
|
|
19
20
|
type: :runtime
|
|
20
21
|
prerelease: false
|
|
21
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
22
23
|
requirements:
|
|
23
24
|
- - ">="
|
|
24
25
|
- !ruby/object:Gem::Version
|
|
25
|
-
version:
|
|
26
|
+
version: 7.2.0
|
|
26
27
|
- !ruby/object:Gem::Dependency
|
|
27
|
-
name:
|
|
28
|
+
name: rails
|
|
28
29
|
requirement: !ruby/object:Gem::Requirement
|
|
29
30
|
requirements:
|
|
30
31
|
- - ">="
|
|
31
32
|
- !ruby/object:Gem::Version
|
|
32
|
-
version:
|
|
33
|
+
version: 7.2.0
|
|
33
34
|
type: :runtime
|
|
34
35
|
prerelease: false
|
|
35
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
36
37
|
requirements:
|
|
37
38
|
- - ">="
|
|
38
39
|
- !ruby/object:Gem::Version
|
|
39
|
-
version:
|
|
40
|
+
version: 7.2.0
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: combustion
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.3'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '1.3'
|
|
40
55
|
- !ruby/object:Gem::Dependency
|
|
41
56
|
name: rspec
|
|
42
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -66,7 +81,7 @@ dependencies:
|
|
|
66
81
|
- !ruby/object:Gem::Version
|
|
67
82
|
version: '0'
|
|
68
83
|
- !ruby/object:Gem::Dependency
|
|
69
|
-
name:
|
|
84
|
+
name: simplecov
|
|
70
85
|
requirement: !ruby/object:Gem::Requirement
|
|
71
86
|
requirements:
|
|
72
87
|
- - ">="
|
|
@@ -80,7 +95,7 @@ dependencies:
|
|
|
80
95
|
- !ruby/object:Gem::Version
|
|
81
96
|
version: '0'
|
|
82
97
|
- !ruby/object:Gem::Dependency
|
|
83
|
-
name:
|
|
98
|
+
name: sqlite3
|
|
84
99
|
requirement: !ruby/object:Gem::Requirement
|
|
85
100
|
requirements:
|
|
86
101
|
- - ">="
|
|
@@ -94,19 +109,19 @@ dependencies:
|
|
|
94
109
|
- !ruby/object:Gem::Version
|
|
95
110
|
version: '0'
|
|
96
111
|
- !ruby/object:Gem::Dependency
|
|
97
|
-
name:
|
|
112
|
+
name: version_boss
|
|
98
113
|
requirement: !ruby/object:Gem::Requirement
|
|
99
114
|
requirements:
|
|
100
|
-
- - "
|
|
115
|
+
- - ">="
|
|
101
116
|
- !ruby/object:Gem::Version
|
|
102
|
-
version: '
|
|
117
|
+
version: '0'
|
|
103
118
|
type: :development
|
|
104
119
|
prerelease: false
|
|
105
120
|
version_requirements: !ruby/object:Gem::Requirement
|
|
106
121
|
requirements:
|
|
107
|
-
- - "
|
|
122
|
+
- - ">="
|
|
108
123
|
- !ruby/object:Gem::Version
|
|
109
|
-
version: '
|
|
124
|
+
version: '0'
|
|
110
125
|
description: Prevents duplicate file uploads in Active Storage by reusing existing
|
|
111
126
|
blobs with matching checksums and service names
|
|
112
127
|
email:
|
|
@@ -144,6 +159,7 @@ metadata:
|
|
|
144
159
|
homepage_uri: https://github.com/coderhs/active_storage_dedup
|
|
145
160
|
source_code_uri: https://github.com/coderhs/active_storage_dedup
|
|
146
161
|
changelog_uri: https://github.com/coderhs/active_storage_dedup/blob/main/CHANGELOG.md
|
|
162
|
+
post_install_message:
|
|
147
163
|
rdoc_options: []
|
|
148
164
|
require_paths:
|
|
149
165
|
- lib
|
|
@@ -151,14 +167,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
151
167
|
requirements:
|
|
152
168
|
- - ">="
|
|
153
169
|
- !ruby/object:Gem::Version
|
|
154
|
-
version:
|
|
170
|
+
version: 3.1.0
|
|
155
171
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
172
|
requirements:
|
|
157
173
|
- - ">="
|
|
158
174
|
- !ruby/object:Gem::Version
|
|
159
175
|
version: '0'
|
|
160
176
|
requirements: []
|
|
161
|
-
rubygems_version: 3.
|
|
177
|
+
rubygems_version: 3.5.22
|
|
178
|
+
signing_key:
|
|
162
179
|
specification_version: 4
|
|
163
180
|
summary: Deduplication for Active Storage uploads
|
|
164
181
|
test_files: []
|