active_storage_dedup 1.0.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE.txt +21 -0
- data/README.md +419 -0
- data/Rakefile +12 -0
- data/lib/active_storage_dedup/attachment_extension.rb +39 -0
- data/lib/active_storage_dedup/attachment_options.rb +21 -0
- data/lib/active_storage_dedup/blob_deduplication.rb +139 -0
- data/lib/active_storage_dedup/changes_extension.rb +70 -0
- data/lib/active_storage_dedup/configuration.rb +78 -0
- data/lib/active_storage_dedup/deduplication_job.rb +100 -0
- data/lib/active_storage_dedup/railtie.rb +48 -0
- data/lib/active_storage_dedup/version.rb +5 -0
- data/lib/active_storage_dedup.rb +16 -0
- data/lib/generators/active_storage_dedup/install_generator.rb +36 -0
- data/lib/generators/active_storage_dedup/templates/README +44 -0
- data/lib/generators/active_storage_dedup/templates/add_active_storage_dedup.rb.erb +41 -0
- data/lib/generators/active_storage_dedup/templates/initializer.rb +42 -0
- data/lib/tasks/active_storage_dedup.rake +101 -0
- data/sig/active_storage_dedup.rbs +4 -0
- metadata +164 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorageDedup
|
|
4
|
+
module BlobDeduplication
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
class_methods do
|
|
8
|
+
# PRIMARY HOOK: Rails 6.1+ form uploads via Changes::CreateOne
|
|
9
|
+
# This is called by ActiveStorage::Attached::Changes::CreateOne#find_or_build_blob
|
|
10
|
+
def build_after_unfurling(io:, filename:, content_type: nil, metadata: nil,
|
|
11
|
+
service_name: nil, identify: true,
|
|
12
|
+
__dedup_record: nil, __dedup_attachment_name: nil, **options)
|
|
13
|
+
Rails.logger.debug "[ActiveStorageDedup] build_after_unfurling called for #{filename}"
|
|
14
|
+
Rails.logger.debug "[ActiveStorageDedup] Context: record=#{__dedup_record&.class&.name}, attachment=#{__dedup_attachment_name}"
|
|
15
|
+
|
|
16
|
+
# Build the blob using the original method to get the checksum computed
|
|
17
|
+
blob = super(
|
|
18
|
+
io: io, filename: filename, content_type: content_type,
|
|
19
|
+
metadata: metadata, service_name: service_name, identify: identify, **options
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
Rails.logger.debug "[ActiveStorageDedup] Blob built with checksum: #{blob.checksum&.slice(0, 12)}..."
|
|
23
|
+
|
|
24
|
+
# Check if deduplication enabled for this attachment
|
|
25
|
+
should_dedup = should_deduplicate?(__dedup_record, __dedup_attachment_name)
|
|
26
|
+
Rails.logger.debug "[ActiveStorageDedup] Deduplication enabled: #{should_dedup}"
|
|
27
|
+
|
|
28
|
+
# Check if a blob with this checksum already exists
|
|
29
|
+
if should_dedup && blob.checksum
|
|
30
|
+
actual_service_name = blob.service_name || service.name
|
|
31
|
+
Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{blob.checksum[0..12]}..., service=#{actual_service_name}"
|
|
32
|
+
|
|
33
|
+
if existing_blob = find_by(checksum: blob.checksum, service_name: actual_service_name)
|
|
34
|
+
Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} (checksum: #{blob.checksum[0..12]}..., service: #{actual_service_name})"
|
|
35
|
+
return existing_blob
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
Rails.logger.debug "[ActiveStorageDedup] No duplicate found, will use new blob"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
Rails.logger.info "[ActiveStorageDedup] Creating new blob for #{filename} (checksum: #{blob.checksum&.slice(0, 12)}...)" if should_dedup
|
|
42
|
+
blob
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# HOOK 2: Direct uploads to cloud storage
|
|
46
|
+
def create_before_direct_upload!(key: nil, filename:, byte_size:, checksum:,
|
|
47
|
+
content_type: nil, metadata: nil,
|
|
48
|
+
service_name: nil,
|
|
49
|
+
__dedup_record: nil, __dedup_attachment_name: nil, **options)
|
|
50
|
+
Rails.logger.debug "[ActiveStorageDedup] create_before_direct_upload! called for #{filename}"
|
|
51
|
+
Rails.logger.debug "[ActiveStorageDedup] Context: record=#{__dedup_record&.class&.name}, attachment=#{__dedup_attachment_name}"
|
|
52
|
+
Rails.logger.debug "[ActiveStorageDedup] Checksum provided by client: #{checksum&.slice(0, 12)}..."
|
|
53
|
+
|
|
54
|
+
# Check if deduplication enabled
|
|
55
|
+
should_dedup = should_deduplicate?(__dedup_record, __dedup_attachment_name)
|
|
56
|
+
Rails.logger.debug "[ActiveStorageDedup] Deduplication enabled: #{should_dedup}"
|
|
57
|
+
|
|
58
|
+
unless should_dedup
|
|
59
|
+
Rails.logger.debug "[ActiveStorageDedup] Deduplication disabled, creating new blob"
|
|
60
|
+
return super(
|
|
61
|
+
key: key, filename: filename, byte_size: byte_size, checksum: checksum,
|
|
62
|
+
content_type: content_type, metadata: metadata,
|
|
63
|
+
service_name: service_name, **options
|
|
64
|
+
)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Checksum already provided by client
|
|
68
|
+
actual_service_name = service_name || service.name
|
|
69
|
+
Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{checksum[0..12]}..., service=#{actual_service_name}"
|
|
70
|
+
|
|
71
|
+
# Check for existing blob
|
|
72
|
+
if existing_blob = find_by(checksum: checksum, service_name: actual_service_name)
|
|
73
|
+
Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} for direct upload (checksum: #{checksum[0..12]}..., service: #{actual_service_name})"
|
|
74
|
+
return existing_blob
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
Rails.logger.debug "[ActiveStorageDedup] No duplicate found, creating new blob"
|
|
78
|
+
# No duplicate - create new blob
|
|
79
|
+
new_blob = super(
|
|
80
|
+
key: key, filename: filename, byte_size: byte_size, checksum: checksum,
|
|
81
|
+
content_type: content_type, metadata: metadata,
|
|
82
|
+
service_name: service_name, **options
|
|
83
|
+
)
|
|
84
|
+
Rails.logger.info "[ActiveStorageDedup] Created new blob #{new_blob.id} for direct upload #{filename}"
|
|
85
|
+
new_blob
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# HOOK 3: Fallback for programmatic attach (record.file.attach(io: ...))
|
|
89
|
+
def create_after_unfurling!(key: nil, io:, filename:, content_type: nil,
|
|
90
|
+
metadata: nil, service_name: nil, identify: true,
|
|
91
|
+
__dedup_record: nil, __dedup_attachment_name: nil, **options)
|
|
92
|
+
Rails.logger.debug "[ActiveStorageDedup] create_after_unfurling! called for #{filename}"
|
|
93
|
+
Rails.logger.debug "[ActiveStorageDedup] Context: record=#{__dedup_record&.class&.name}, attachment=#{__dedup_attachment_name}"
|
|
94
|
+
|
|
95
|
+
# Check if deduplication enabled
|
|
96
|
+
should_dedup = should_deduplicate?(__dedup_record, __dedup_attachment_name)
|
|
97
|
+
Rails.logger.debug "[ActiveStorageDedup] Deduplication enabled: #{should_dedup}"
|
|
98
|
+
|
|
99
|
+
unless should_dedup
|
|
100
|
+
Rails.logger.debug "[ActiveStorageDedup] Deduplication disabled, creating new blob"
|
|
101
|
+
return super(
|
|
102
|
+
key: key, io: io, filename: filename, content_type: content_type,
|
|
103
|
+
metadata: metadata, service_name: service_name, identify: identify, **options
|
|
104
|
+
)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
Rails.logger.debug "[ActiveStorageDedup] Building blob to compute checksum..."
|
|
108
|
+
# Build blob first to get checksum (but don't save yet)
|
|
109
|
+
blob = build_after_unfurling(
|
|
110
|
+
io: io, filename: filename, content_type: content_type,
|
|
111
|
+
metadata: metadata, service_name: service_name, identify: identify,
|
|
112
|
+
__dedup_record: __dedup_record, __dedup_attachment_name: __dedup_attachment_name
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# If build_after_unfurling returned an existing blob, just return it
|
|
116
|
+
if blob.persisted?
|
|
117
|
+
Rails.logger.debug "[ActiveStorageDedup] build_after_unfurling returned existing blob #{blob.id}"
|
|
118
|
+
return blob
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Otherwise save the new blob
|
|
122
|
+
Rails.logger.debug "[ActiveStorageDedup] Saving new blob..."
|
|
123
|
+
blob.save!
|
|
124
|
+
Rails.logger.info "[ActiveStorageDedup] Created and saved new blob #{blob.id} for #{filename}"
|
|
125
|
+
blob
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
private
|
|
129
|
+
|
|
130
|
+
def should_deduplicate?(record, attachment_name)
|
|
131
|
+
# If no context, use global setting
|
|
132
|
+
return ActiveStorageDedup.enabled? unless record && attachment_name
|
|
133
|
+
|
|
134
|
+
# Check per-attachment setting
|
|
135
|
+
ActiveStorageDedup.deduplicate_enabled_for?(record, attachment_name)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorageDedup
|
|
4
|
+
module ChangesExtension
|
|
5
|
+
extend ActiveSupport::Concern
|
|
6
|
+
|
|
7
|
+
# Patch Changes::CreateOne#find_or_build_blob to pass context
|
|
8
|
+
def find_or_build_blob
|
|
9
|
+
Rails.logger.debug "[ActiveStorageDedup] ChangesExtension#find_or_build_blob called for #{name} attachment"
|
|
10
|
+
Rails.logger.debug "[ActiveStorageDedup] Attachable type: #{attachable.class.name}"
|
|
11
|
+
|
|
12
|
+
case attachable
|
|
13
|
+
when ActiveStorage::Blob
|
|
14
|
+
attachable
|
|
15
|
+
when ActionDispatch::Http::UploadedFile
|
|
16
|
+
ActiveStorage::Blob.build_after_unfurling(
|
|
17
|
+
io: attachable.open,
|
|
18
|
+
filename: attachable.original_filename,
|
|
19
|
+
content_type: attachable.content_type,
|
|
20
|
+
__dedup_record: record,
|
|
21
|
+
__dedup_attachment_name: name,
|
|
22
|
+
service_name: attachment_service_name
|
|
23
|
+
)
|
|
24
|
+
when Rack::Test::UploadedFile
|
|
25
|
+
ActiveStorage::Blob.build_after_unfurling(
|
|
26
|
+
io: attachable.respond_to?(:open) ? attachable.open : attachable,
|
|
27
|
+
filename: attachable.original_filename,
|
|
28
|
+
content_type: attachable.content_type,
|
|
29
|
+
__dedup_record: record,
|
|
30
|
+
__dedup_attachment_name: name,
|
|
31
|
+
service_name: attachment_service_name
|
|
32
|
+
)
|
|
33
|
+
when Hash
|
|
34
|
+
ActiveStorage::Blob.build_after_unfurling(
|
|
35
|
+
**attachable.reverse_merge(
|
|
36
|
+
record: record,
|
|
37
|
+
service_name: attachment_service_name
|
|
38
|
+
).symbolize_keys.merge(
|
|
39
|
+
__dedup_record: record,
|
|
40
|
+
__dedup_attachment_name: name
|
|
41
|
+
)
|
|
42
|
+
)
|
|
43
|
+
when String
|
|
44
|
+
ActiveStorage::Blob.find_signed!(attachable, record: record)
|
|
45
|
+
when File
|
|
46
|
+
ActiveStorage::Blob.build_after_unfurling(
|
|
47
|
+
io: attachable,
|
|
48
|
+
filename: File.basename(attachable),
|
|
49
|
+
__dedup_record: record,
|
|
50
|
+
__dedup_attachment_name: name,
|
|
51
|
+
service_name: attachment_service_name
|
|
52
|
+
)
|
|
53
|
+
when Pathname
|
|
54
|
+
ActiveStorage::Blob.build_after_unfurling(
|
|
55
|
+
io: attachable.open,
|
|
56
|
+
filename: File.basename(attachable),
|
|
57
|
+
__dedup_record: record,
|
|
58
|
+
__dedup_attachment_name: name,
|
|
59
|
+
service_name: attachment_service_name
|
|
60
|
+
)
|
|
61
|
+
else
|
|
62
|
+
raise(
|
|
63
|
+
ArgumentError,
|
|
64
|
+
"Could not find or build blob: expected attachable, " \
|
|
65
|
+
"got #{attachable.inspect}"
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorageDedup
|
|
4
|
+
class Configuration
|
|
5
|
+
# Master switch to enable/disable the entire gem (default: true)
|
|
6
|
+
# If false, no deduplication or lifecycle management will occur at all
|
|
7
|
+
attr_accessor :enabled
|
|
8
|
+
|
|
9
|
+
# Default deduplication setting for attachments when gem is enabled (default: true)
|
|
10
|
+
# This can be overridden per-attachment using the deduplicate: option
|
|
11
|
+
# Only applies when enabled = true
|
|
12
|
+
attr_accessor :deduplicate_by_default
|
|
13
|
+
|
|
14
|
+
# Automatically purge orphaned blobs when reference_count reaches 0 (default: true)
|
|
15
|
+
# Only applies when enabled = true
|
|
16
|
+
attr_accessor :auto_purge_orphans
|
|
17
|
+
|
|
18
|
+
def initialize
|
|
19
|
+
@enabled = true
|
|
20
|
+
@deduplicate_by_default = true
|
|
21
|
+
@auto_purge_orphans = true
|
|
22
|
+
Rails.logger.debug "[ActiveStorageDedup] Configuration initialized with defaults: enabled=#{@enabled}, deduplicate_by_default=#{@deduplicate_by_default}, auto_purge_orphans=#{@auto_purge_orphans}" if defined?(Rails)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
attr_writer :configuration
|
|
28
|
+
|
|
29
|
+
def configuration
|
|
30
|
+
@configuration ||= Configuration.new
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def configure
|
|
34
|
+
Rails.logger.debug "[ActiveStorageDedup] Configuring ActiveStorageDedup..." if defined?(Rails)
|
|
35
|
+
yield(configuration)
|
|
36
|
+
Rails.logger.info "[ActiveStorageDedup] Configuration updated: enabled=#{configuration.enabled}, deduplicate_by_default=#{configuration.deduplicate_by_default}, auto_purge_orphans=#{configuration.auto_purge_orphans}" if defined?(Rails)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def enabled?
|
|
40
|
+
configuration.enabled
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Track which attachments have deduplicate disabled
|
|
44
|
+
def attachment_settings
|
|
45
|
+
@attachment_settings ||= {}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def register_attachment(model_name, attachment_name, deduplicate:)
|
|
49
|
+
key = "#{model_name}##{attachment_name}"
|
|
50
|
+
attachment_settings[key] = { deduplicate: deduplicate }
|
|
51
|
+
Rails.logger.debug "[ActiveStorageDedup] Registered attachment #{key} with deduplicate=#{deduplicate}" if defined?(Rails)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def deduplicate_enabled_for?(record, attachment_name)
|
|
55
|
+
# First check: Is the gem enabled at all?
|
|
56
|
+
unless configuration.enabled
|
|
57
|
+
Rails.logger.debug "[ActiveStorageDedup] Gem is disabled globally (enabled=false)" if defined?(Rails)
|
|
58
|
+
return false
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
key = "#{record.class.name}##{attachment_name}"
|
|
62
|
+
settings = attachment_settings[key]
|
|
63
|
+
|
|
64
|
+
# Second check: Model-level setting takes precedence over global default
|
|
65
|
+
# If model explicitly sets deduplicate: true/false, use that
|
|
66
|
+
# Otherwise, fall back to configuration.deduplicate_by_default
|
|
67
|
+
if settings.nil?
|
|
68
|
+
result = configuration.deduplicate_by_default
|
|
69
|
+
Rails.logger.debug "[ActiveStorageDedup] Deduplication check for #{key}: #{result} (using deduplicate_by_default)" if defined?(Rails)
|
|
70
|
+
else
|
|
71
|
+
result = settings[:deduplicate]
|
|
72
|
+
Rails.logger.debug "[ActiveStorageDedup] Deduplication check for #{key}: #{result} (model-level override)" if defined?(Rails)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
result
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorageDedup
|
|
4
|
+
class DeduplicationJob < ActiveJob::Base
|
|
5
|
+
queue_as :default
|
|
6
|
+
|
|
7
|
+
# Sanity check job to find and merge duplicate blobs across the entire database
|
|
8
|
+
# Can be run on-demand or scheduled (daily/weekly) to clean up any duplicates
|
|
9
|
+
# that may have slipped through due to race conditions
|
|
10
|
+
#
|
|
11
|
+
# @example Run manually
|
|
12
|
+
# ActiveStorageDedup::DeduplicationJob.perform_now
|
|
13
|
+
#
|
|
14
|
+
# @example Schedule with whenever gem
|
|
15
|
+
# every 1.day, at: '2:00 am' do
|
|
16
|
+
# runner "ActiveStorageDedup::DeduplicationJob.perform_later"
|
|
17
|
+
# end
|
|
18
|
+
#
|
|
19
|
+
# @example Schedule with sidekiq-cron
|
|
20
|
+
# ActiveStorageDedup::DeduplicationJob.set(cron: '0 2 * * *').perform_later
|
|
21
|
+
def perform
|
|
22
|
+
Rails.logger.info "[ActiveStorageDedup] 🔍 Starting sanity check - scanning for duplicate blobs..."
|
|
23
|
+
|
|
24
|
+
# Find all checksum+service combinations that have duplicates
|
|
25
|
+
duplicate_groups = ActiveStorage::Blob
|
|
26
|
+
.select(:checksum, :service_name)
|
|
27
|
+
.group(:checksum, :service_name)
|
|
28
|
+
.having('COUNT(*) > 1')
|
|
29
|
+
.count
|
|
30
|
+
|
|
31
|
+
if duplicate_groups.empty?
|
|
32
|
+
Rails.logger.info "[ActiveStorageDedup] ✓ No duplicate blobs found - database is clean!"
|
|
33
|
+
return
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
Rails.logger.info "[ActiveStorageDedup] Found #{duplicate_groups.size} group(s) with duplicates"
|
|
37
|
+
|
|
38
|
+
total_merged = 0
|
|
39
|
+
duplicate_groups.each do |(checksum, service_name), count|
|
|
40
|
+
merged = process_duplicate_group(checksum, service_name)
|
|
41
|
+
total_merged += merged
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
Rails.logger.info "[ActiveStorageDedup] ✓ Sanity check complete - merged #{total_merged} duplicate blob(s)"
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
def process_duplicate_group(checksum, service_name)
|
|
50
|
+
Rails.logger.debug "[ActiveStorageDedup] Processing duplicate group: checksum=#{checksum[0..12]}..., service=#{service_name}"
|
|
51
|
+
|
|
52
|
+
# Find all blobs with same checksum and service
|
|
53
|
+
duplicate_blobs = ActiveStorage::Blob
|
|
54
|
+
.where(checksum: checksum, service_name: service_name)
|
|
55
|
+
.order(:created_at)
|
|
56
|
+
.to_a
|
|
57
|
+
|
|
58
|
+
Rails.logger.debug "[ActiveStorageDedup] Found #{duplicate_blobs.size} blob(s) with checksum #{checksum[0..12]}..."
|
|
59
|
+
|
|
60
|
+
# Keep the oldest blob (first created)
|
|
61
|
+
keeper = duplicate_blobs.first
|
|
62
|
+
duplicates = duplicate_blobs[1..]
|
|
63
|
+
|
|
64
|
+
Rails.logger.info "[ActiveStorageDedup] 🔄 Merging #{duplicates.size} duplicate(s) into blob #{keeper.id} (checksum: #{checksum[0..12]}...)"
|
|
65
|
+
|
|
66
|
+
# Merge each duplicate into the keeper
|
|
67
|
+
duplicates.each do |duplicate_blob|
|
|
68
|
+
merge_duplicate(keeper, duplicate_blob)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
duplicates.size
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def merge_duplicate(keeper, duplicate)
|
|
75
|
+
Rails.logger.debug "[ActiveStorageDedup] Merging blob #{duplicate.id} into keeper #{keeper.id}..."
|
|
76
|
+
|
|
77
|
+
# Count attachments to move
|
|
78
|
+
attachment_count = duplicate.attachments.count
|
|
79
|
+
Rails.logger.debug "[ActiveStorageDedup] Moving #{attachment_count} attachment(s) from blob #{duplicate.id} to #{keeper.id}"
|
|
80
|
+
|
|
81
|
+
# Move all attachments from duplicate to keeper
|
|
82
|
+
duplicate.attachments.update_all(blob_id: keeper.id)
|
|
83
|
+
|
|
84
|
+
# Update counter cache on keeper
|
|
85
|
+
# Rails counter cache won't auto-update since we used update_all
|
|
86
|
+
keeper.increment!(:reference_count, attachment_count)
|
|
87
|
+
Rails.logger.debug "[ActiveStorageDedup] Updated keeper #{keeper.id} reference_count to #{keeper.reference_count}"
|
|
88
|
+
|
|
89
|
+
# Delete duplicate blob record (without purging file, since it's same as keeper)
|
|
90
|
+
duplicate.delete
|
|
91
|
+
Rails.logger.debug "[ActiveStorageDedup] Deleted duplicate blob #{duplicate.id} record"
|
|
92
|
+
|
|
93
|
+
Rails.logger.info "[ActiveStorageDedup] ✓ Merged blob #{duplicate.id} (#{attachment_count} attachment(s)) into #{keeper.id}"
|
|
94
|
+
rescue => e
|
|
95
|
+
Rails.logger.error "[ActiveStorageDedup] ✗ Error merging blob #{duplicate.id}: #{e.class.name} - #{e.message}"
|
|
96
|
+
Rails.logger.debug "[ActiveStorageDedup] Error backtrace: #{e.backtrace.first(5).join("\n")}"
|
|
97
|
+
# Don't raise - allow job to complete for other duplicates
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ActiveStorageDedup
|
|
4
|
+
class Railtie < ::Rails::Railtie
|
|
5
|
+
# Use after_initialize for reliable patching
|
|
6
|
+
config.after_initialize do
|
|
7
|
+
Rails.logger.info "[ActiveStorageDedup] Initializing ActiveStorageDedup gem..."
|
|
8
|
+
|
|
9
|
+
# Patch ActiveStorage::Blob with deduplication hooks
|
|
10
|
+
ActiveSupport.on_load(:active_storage_blob) do
|
|
11
|
+
Rails.logger.debug "[ActiveStorageDedup] Patching ActiveStorage::Blob with BlobDeduplication module"
|
|
12
|
+
# Prepend deduplication module to singleton class (for class methods)
|
|
13
|
+
# Using prepend allows us to use 'super' to call the original methods
|
|
14
|
+
ActiveStorage::Blob.singleton_class.prepend(
|
|
15
|
+
ActiveStorageDedup::BlobDeduplication::ClassMethods
|
|
16
|
+
)
|
|
17
|
+
Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveStorage::Blob patched successfully"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Patch ActiveStorage::Attachment with counter cache and lifecycle
|
|
21
|
+
ActiveSupport.on_load(:active_storage_attachment) do
|
|
22
|
+
Rails.logger.debug "[ActiveStorageDedup] Patching ActiveStorage::Attachment with AttachmentExtension module"
|
|
23
|
+
include ActiveStorageDedup::AttachmentExtension
|
|
24
|
+
Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveStorage::Attachment patched successfully"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Patch Changes::CreateOne to pass context (Rails 6.0+)
|
|
28
|
+
if defined?(ActiveStorage::Attached::Changes::CreateOne)
|
|
29
|
+
Rails.logger.debug "[ActiveStorageDedup] Patching ActiveStorage::Attached::Changes::CreateOne with ChangesExtension module"
|
|
30
|
+
ActiveStorage::Attached::Changes::CreateOne.prepend(
|
|
31
|
+
ActiveStorageDedup::ChangesExtension
|
|
32
|
+
)
|
|
33
|
+
Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveStorage::Attached::Changes::CreateOne patched successfully"
|
|
34
|
+
else
|
|
35
|
+
Rails.logger.debug "[ActiveStorageDedup] ActiveStorage::Attached::Changes::CreateOne not found, skipping patch"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Extend ActiveRecord with has_attached options
|
|
39
|
+
ActiveSupport.on_load(:active_record) do
|
|
40
|
+
Rails.logger.debug "[ActiveStorageDedup] Extending ActiveRecord with AttachmentOptions module"
|
|
41
|
+
extend ActiveStorageDedup::AttachmentOptions
|
|
42
|
+
Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveRecord extended successfully"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
Rails.logger.info "[ActiveStorageDedup] ✓ Gem initialization complete"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "active_storage_dedup/version"
|
|
4
|
+
require_relative "active_storage_dedup/configuration"
|
|
5
|
+
require_relative "active_storage_dedup/blob_deduplication"
|
|
6
|
+
require_relative "active_storage_dedup/changes_extension"
|
|
7
|
+
require_relative "active_storage_dedup/attachment_options"
|
|
8
|
+
require_relative "active_storage_dedup/attachment_extension"
|
|
9
|
+
|
|
10
|
+
module ActiveStorageDedup
|
|
11
|
+
# Background job for deduplication
|
|
12
|
+
autoload :DeduplicationJob, "active_storage_dedup/deduplication_job"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Load Railtie for Rails integration
|
|
16
|
+
require_relative "active_storage_dedup/railtie" if defined?(Rails::Railtie)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rails/generators"
|
|
4
|
+
require "rails/generators/migration"
|
|
5
|
+
|
|
6
|
+
module ActiveStorageDedup
|
|
7
|
+
module Generators
|
|
8
|
+
class InstallGenerator < ::Rails::Generators::Base
|
|
9
|
+
include Rails::Generators::Migration
|
|
10
|
+
|
|
11
|
+
source_root File.expand_path("templates", __dir__)
|
|
12
|
+
|
|
13
|
+
desc "Creates ActiveStorageDedup initializer and migration files"
|
|
14
|
+
|
|
15
|
+
def self.next_migration_number(path)
|
|
16
|
+
next_migration_number = current_migration_number(path) + 1
|
|
17
|
+
ActiveRecord::Migration.next_migration_number(next_migration_number)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def copy_initializer
|
|
21
|
+
template "initializer.rb", "config/initializers/active_storage_dedup.rb"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def copy_migrations
|
|
25
|
+
migration_template(
|
|
26
|
+
"add_active_storage_dedup.rb.erb",
|
|
27
|
+
"db/migrate/add_active_storage_dedup.rb"
|
|
28
|
+
)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def show_readme
|
|
32
|
+
readme "README" if behavior == :invoke
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
===============================================================================
|
|
2
|
+
|
|
3
|
+
ActiveStorageDedup has been installed!
|
|
4
|
+
|
|
5
|
+
Next steps:
|
|
6
|
+
|
|
7
|
+
1. Run the migration:
|
|
8
|
+
rails db:migrate
|
|
9
|
+
|
|
10
|
+
2. Review the configuration in config/initializers/active_storage_dedup.rb
|
|
11
|
+
The initializer has been created with sensible defaults.
|
|
12
|
+
|
|
13
|
+
3. Optional: Control deduplication per-attachment:
|
|
14
|
+
|
|
15
|
+
class Product < ApplicationRecord
|
|
16
|
+
has_many_attached :images # Uses config.deduplicate_by_default
|
|
17
|
+
has_one_attached :unique_badge, deduplicate: false # Override: don't deduplicate
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
4. Optional: Backfill reference_count for existing blobs:
|
|
21
|
+
rails active_storage_dedup:backfill_reference_count
|
|
22
|
+
|
|
23
|
+
5. Optional: Schedule sanity check job (recommended weekly/monthly):
|
|
24
|
+
|
|
25
|
+
# Run manually
|
|
26
|
+
ActiveStorageDedup::DeduplicationJob.perform_now
|
|
27
|
+
|
|
28
|
+
# Or via rake task
|
|
29
|
+
rails active_storage_dedup:cleanup_all
|
|
30
|
+
|
|
31
|
+
# Or schedule with whenever/sidekiq-cron
|
|
32
|
+
every 1.week do
|
|
33
|
+
runner "ActiveStorageDedup::DeduplicationJob.perform_later"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
Available rake tasks:
|
|
37
|
+
- rails active_storage_dedup:report_duplicates
|
|
38
|
+
- rails active_storage_dedup:cleanup_all
|
|
39
|
+
- rails active_storage_dedup:backfill_reference_count
|
|
40
|
+
|
|
41
|
+
For more information, visit:
|
|
42
|
+
https://github.com/yourusername/active_storage_dedup
|
|
43
|
+
|
|
44
|
+
===============================================================================
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
class AddActiveStorageDedup < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
|
|
2
|
+
def up
|
|
3
|
+
# Add reference_count column to active_storage_blobs
|
|
4
|
+
unless column_exists?(:active_storage_blobs, :reference_count)
|
|
5
|
+
add_column :active_storage_blobs, :reference_count, :integer, default: 0, null: false
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
# Add composite index on checksum and service_name for deduplication lookups
|
|
9
|
+
unless index_exists?(:active_storage_blobs, [:checksum, :service_name])
|
|
10
|
+
add_index :active_storage_blobs, [:checksum, :service_name], name: "index_active_storage_blobs_on_checksum_and_service"
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Backfill reference_count with actual attachment counts
|
|
14
|
+
# Using SQL for efficiency with large datasets
|
|
15
|
+
execute <<-SQL.squish
|
|
16
|
+
UPDATE active_storage_blobs
|
|
17
|
+
SET reference_count = (
|
|
18
|
+
SELECT COUNT(*)
|
|
19
|
+
FROM active_storage_attachments
|
|
20
|
+
WHERE active_storage_attachments.blob_id = active_storage_blobs.id
|
|
21
|
+
)
|
|
22
|
+
SQL
|
|
23
|
+
|
|
24
|
+
puts "ActiveStorageDedup migration complete!"
|
|
25
|
+
puts " - Added reference_count column to active_storage_blobs"
|
|
26
|
+
puts " - Added composite index on [checksum, service_name]"
|
|
27
|
+
puts " - Backfilled reference_count for #{ActiveStorage::Blob.count} existing blobs"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def down
|
|
31
|
+
# Remove index
|
|
32
|
+
if index_exists?(:active_storage_blobs, [:checksum, :service_name])
|
|
33
|
+
remove_index :active_storage_blobs, name: "index_active_storage_blobs_on_checksum_and_service"
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Remove column
|
|
37
|
+
if column_exists?(:active_storage_blobs, :reference_count)
|
|
38
|
+
remove_column :active_storage_blobs, :reference_count
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
ActiveStorageDedup.configure do |config|
|
|
4
|
+
# Master switch to enable/disable the entire gem
|
|
5
|
+
# Set to false to completely disable deduplication and lifecycle management
|
|
6
|
+
# Default: true
|
|
7
|
+
config.enabled = true
|
|
8
|
+
|
|
9
|
+
# Default deduplication setting for all attachments
|
|
10
|
+
# When enabled=true, this controls whether attachments deduplicate by default
|
|
11
|
+
# Can be overridden per-attachment using: has_many_attached :images, deduplicate: false
|
|
12
|
+
# Default: true
|
|
13
|
+
config.deduplicate_by_default = true
|
|
14
|
+
|
|
15
|
+
# Automatically purge orphaned blobs when reference_count reaches 0
|
|
16
|
+
# When enabled, blobs are automatically deleted when no attachments reference them
|
|
17
|
+
# Default: true
|
|
18
|
+
config.auto_purge_orphans = true
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Usage Examples:
|
|
22
|
+
#
|
|
23
|
+
# Opt-out pattern (deduplicate everything except specific attachments):
|
|
24
|
+
# config.enabled = true
|
|
25
|
+
# config.deduplicate_by_default = true
|
|
26
|
+
#
|
|
27
|
+
# class Product < ApplicationRecord
|
|
28
|
+
# has_many_attached :images # Deduplicates (uses default)
|
|
29
|
+
# has_one_attached :unique_badge, deduplicate: false # Does NOT deduplicate (override)
|
|
30
|
+
# end
|
|
31
|
+
#
|
|
32
|
+
# Opt-in pattern (only deduplicate specific attachments):
|
|
33
|
+
# config.enabled = true
|
|
34
|
+
# config.deduplicate_by_default = false
|
|
35
|
+
#
|
|
36
|
+
# class Product < ApplicationRecord
|
|
37
|
+
# has_many_attached :images, deduplicate: true # Deduplicates (override)
|
|
38
|
+
# has_one_attached :avatar # Does NOT deduplicate (uses default)
|
|
39
|
+
# end
|
|
40
|
+
#
|
|
41
|
+
# Disable gem entirely (useful for development/testing):
|
|
42
|
+
# config.enabled = false
|