active_storage_dedup 1.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveStorageDedup
4
+ module BlobDeduplication
5
+ extend ActiveSupport::Concern
6
+
7
+ class_methods do
8
+ # PRIMARY HOOK: Rails 6.1+ form uploads via Changes::CreateOne
9
+ # This is called by ActiveStorage::Attached::Changes::CreateOne#find_or_build_blob
10
+ def build_after_unfurling(io:, filename:, content_type: nil, metadata: nil,
11
+ service_name: nil, identify: true,
12
+ __dedup_record: nil, __dedup_attachment_name: nil, **options)
13
+ Rails.logger.debug "[ActiveStorageDedup] build_after_unfurling called for #{filename}"
14
+ Rails.logger.debug "[ActiveStorageDedup] Context: record=#{__dedup_record&.class&.name}, attachment=#{__dedup_attachment_name}"
15
+
16
+ # Build the blob using the original method to get the checksum computed
17
+ blob = super(
18
+ io: io, filename: filename, content_type: content_type,
19
+ metadata: metadata, service_name: service_name, identify: identify, **options
20
+ )
21
+
22
+ Rails.logger.debug "[ActiveStorageDedup] Blob built with checksum: #{blob.checksum&.slice(0, 12)}..."
23
+
24
+ # Check if deduplication enabled for this attachment
25
+ should_dedup = should_deduplicate?(__dedup_record, __dedup_attachment_name)
26
+ Rails.logger.debug "[ActiveStorageDedup] Deduplication enabled: #{should_dedup}"
27
+
28
+ # Check if a blob with this checksum already exists
29
+ if should_dedup && blob.checksum
30
+ actual_service_name = blob.service_name || service.name
31
+ Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{blob.checksum[0..12]}..., service=#{actual_service_name}"
32
+
33
+ if existing_blob = find_by(checksum: blob.checksum, service_name: actual_service_name)
34
+ Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} (checksum: #{blob.checksum[0..12]}..., service: #{actual_service_name})"
35
+ return existing_blob
36
+ end
37
+
38
+ Rails.logger.debug "[ActiveStorageDedup] No duplicate found, will use new blob"
39
+ end
40
+
41
+ Rails.logger.info "[ActiveStorageDedup] Creating new blob for #{filename} (checksum: #{blob.checksum&.slice(0, 12)}...)" if should_dedup
42
+ blob
43
+ end
44
+
45
+ # HOOK 2: Direct uploads to cloud storage
46
+ def create_before_direct_upload!(key: nil, filename:, byte_size:, checksum:,
47
+ content_type: nil, metadata: nil,
48
+ service_name: nil,
49
+ __dedup_record: nil, __dedup_attachment_name: nil, **options)
50
+ Rails.logger.debug "[ActiveStorageDedup] create_before_direct_upload! called for #{filename}"
51
+ Rails.logger.debug "[ActiveStorageDedup] Context: record=#{__dedup_record&.class&.name}, attachment=#{__dedup_attachment_name}"
52
+ Rails.logger.debug "[ActiveStorageDedup] Checksum provided by client: #{checksum&.slice(0, 12)}..."
53
+
54
+ # Check if deduplication enabled
55
+ should_dedup = should_deduplicate?(__dedup_record, __dedup_attachment_name)
56
+ Rails.logger.debug "[ActiveStorageDedup] Deduplication enabled: #{should_dedup}"
57
+
58
+ unless should_dedup
59
+ Rails.logger.debug "[ActiveStorageDedup] Deduplication disabled, creating new blob"
60
+ return super(
61
+ key: key, filename: filename, byte_size: byte_size, checksum: checksum,
62
+ content_type: content_type, metadata: metadata,
63
+ service_name: service_name, **options
64
+ )
65
+ end
66
+
67
+ # Checksum already provided by client
68
+ actual_service_name = service_name || service.name
69
+ Rails.logger.debug "[ActiveStorageDedup] Checking for duplicates: checksum=#{checksum[0..12]}..., service=#{actual_service_name}"
70
+
71
+ # Check for existing blob
72
+ if existing_blob = find_by(checksum: checksum, service_name: actual_service_name)
73
+ Rails.logger.info "[ActiveStorageDedup] ✓ Reusing existing blob #{existing_blob.id} for direct upload (checksum: #{checksum[0..12]}..., service: #{actual_service_name})"
74
+ return existing_blob
75
+ end
76
+
77
+ Rails.logger.debug "[ActiveStorageDedup] No duplicate found, creating new blob"
78
+ # No duplicate - create new blob
79
+ new_blob = super(
80
+ key: key, filename: filename, byte_size: byte_size, checksum: checksum,
81
+ content_type: content_type, metadata: metadata,
82
+ service_name: service_name, **options
83
+ )
84
+ Rails.logger.info "[ActiveStorageDedup] Created new blob #{new_blob.id} for direct upload #{filename}"
85
+ new_blob
86
+ end
87
+
88
+ # HOOK 3: Fallback for programmatic attach (record.file.attach(io: ...))
89
+ def create_after_unfurling!(key: nil, io:, filename:, content_type: nil,
90
+ metadata: nil, service_name: nil, identify: true,
91
+ __dedup_record: nil, __dedup_attachment_name: nil, **options)
92
+ Rails.logger.debug "[ActiveStorageDedup] create_after_unfurling! called for #{filename}"
93
+ Rails.logger.debug "[ActiveStorageDedup] Context: record=#{__dedup_record&.class&.name}, attachment=#{__dedup_attachment_name}"
94
+
95
+ # Check if deduplication enabled
96
+ should_dedup = should_deduplicate?(__dedup_record, __dedup_attachment_name)
97
+ Rails.logger.debug "[ActiveStorageDedup] Deduplication enabled: #{should_dedup}"
98
+
99
+ unless should_dedup
100
+ Rails.logger.debug "[ActiveStorageDedup] Deduplication disabled, creating new blob"
101
+ return super(
102
+ key: key, io: io, filename: filename, content_type: content_type,
103
+ metadata: metadata, service_name: service_name, identify: identify, **options
104
+ )
105
+ end
106
+
107
+ Rails.logger.debug "[ActiveStorageDedup] Building blob to compute checksum..."
108
+ # Build blob first to get checksum (but don't save yet)
109
+ blob = build_after_unfurling(
110
+ io: io, filename: filename, content_type: content_type,
111
+ metadata: metadata, service_name: service_name, identify: identify,
112
+ __dedup_record: __dedup_record, __dedup_attachment_name: __dedup_attachment_name
113
+ )
114
+
115
+ # If build_after_unfurling returned an existing blob, just return it
116
+ if blob.persisted?
117
+ Rails.logger.debug "[ActiveStorageDedup] build_after_unfurling returned existing blob #{blob.id}"
118
+ return blob
119
+ end
120
+
121
+ # Otherwise save the new blob
122
+ Rails.logger.debug "[ActiveStorageDedup] Saving new blob..."
123
+ blob.save!
124
+ Rails.logger.info "[ActiveStorageDedup] Created and saved new blob #{blob.id} for #{filename}"
125
+ blob
126
+ end
127
+
128
+ private
129
+
130
+ def should_deduplicate?(record, attachment_name)
131
+ # If no context, use global setting
132
+ return ActiveStorageDedup.enabled? unless record && attachment_name
133
+
134
+ # Check per-attachment setting
135
+ ActiveStorageDedup.deduplicate_enabled_for?(record, attachment_name)
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveStorageDedup
4
+ module ChangesExtension
5
+ extend ActiveSupport::Concern
6
+
7
+ # Patch Changes::CreateOne#find_or_build_blob to pass context
8
+ def find_or_build_blob
9
+ Rails.logger.debug "[ActiveStorageDedup] ChangesExtension#find_or_build_blob called for #{name} attachment"
10
+ Rails.logger.debug "[ActiveStorageDedup] Attachable type: #{attachable.class.name}"
11
+
12
+ case attachable
13
+ when ActiveStorage::Blob
14
+ attachable
15
+ when ActionDispatch::Http::UploadedFile
16
+ ActiveStorage::Blob.build_after_unfurling(
17
+ io: attachable.open,
18
+ filename: attachable.original_filename,
19
+ content_type: attachable.content_type,
20
+ __dedup_record: record,
21
+ __dedup_attachment_name: name,
22
+ service_name: attachment_service_name
23
+ )
24
+ when Rack::Test::UploadedFile
25
+ ActiveStorage::Blob.build_after_unfurling(
26
+ io: attachable.respond_to?(:open) ? attachable.open : attachable,
27
+ filename: attachable.original_filename,
28
+ content_type: attachable.content_type,
29
+ __dedup_record: record,
30
+ __dedup_attachment_name: name,
31
+ service_name: attachment_service_name
32
+ )
33
+ when Hash
34
+ ActiveStorage::Blob.build_after_unfurling(
35
+ **attachable.reverse_merge(
36
+ record: record,
37
+ service_name: attachment_service_name
38
+ ).symbolize_keys.merge(
39
+ __dedup_record: record,
40
+ __dedup_attachment_name: name
41
+ )
42
+ )
43
+ when String
44
+ ActiveStorage::Blob.find_signed!(attachable, record: record)
45
+ when File
46
+ ActiveStorage::Blob.build_after_unfurling(
47
+ io: attachable,
48
+ filename: File.basename(attachable),
49
+ __dedup_record: record,
50
+ __dedup_attachment_name: name,
51
+ service_name: attachment_service_name
52
+ )
53
+ when Pathname
54
+ ActiveStorage::Blob.build_after_unfurling(
55
+ io: attachable.open,
56
+ filename: File.basename(attachable),
57
+ __dedup_record: record,
58
+ __dedup_attachment_name: name,
59
+ service_name: attachment_service_name
60
+ )
61
+ else
62
+ raise(
63
+ ArgumentError,
64
+ "Could not find or build blob: expected attachable, " \
65
+ "got #{attachable.inspect}"
66
+ )
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveStorageDedup
4
+ class Configuration
5
+ # Master switch to enable/disable the entire gem (default: true)
6
+ # If false, no deduplication or lifecycle management will occur at all
7
+ attr_accessor :enabled
8
+
9
+ # Default deduplication setting for attachments when gem is enabled (default: true)
10
+ # This can be overridden per-attachment using the deduplicate: option
11
+ # Only applies when enabled = true
12
+ attr_accessor :deduplicate_by_default
13
+
14
+ # Automatically purge orphaned blobs when reference_count reaches 0 (default: true)
15
+ # Only applies when enabled = true
16
+ attr_accessor :auto_purge_orphans
17
+
18
+ def initialize
19
+ @enabled = true
20
+ @deduplicate_by_default = true
21
+ @auto_purge_orphans = true
22
+ Rails.logger.debug "[ActiveStorageDedup] Configuration initialized with defaults: enabled=#{@enabled}, deduplicate_by_default=#{@deduplicate_by_default}, auto_purge_orphans=#{@auto_purge_orphans}" if defined?(Rails)
23
+ end
24
+ end
25
+
26
+ class << self
27
+ attr_writer :configuration
28
+
29
+ def configuration
30
+ @configuration ||= Configuration.new
31
+ end
32
+
33
+ def configure
34
+ Rails.logger.debug "[ActiveStorageDedup] Configuring ActiveStorageDedup..." if defined?(Rails)
35
+ yield(configuration)
36
+ Rails.logger.info "[ActiveStorageDedup] Configuration updated: enabled=#{configuration.enabled}, deduplicate_by_default=#{configuration.deduplicate_by_default}, auto_purge_orphans=#{configuration.auto_purge_orphans}" if defined?(Rails)
37
+ end
38
+
39
+ def enabled?
40
+ configuration.enabled
41
+ end
42
+
43
+ # Track which attachments have deduplicate disabled
44
+ def attachment_settings
45
+ @attachment_settings ||= {}
46
+ end
47
+
48
+ def register_attachment(model_name, attachment_name, deduplicate:)
49
+ key = "#{model_name}##{attachment_name}"
50
+ attachment_settings[key] = { deduplicate: deduplicate }
51
+ Rails.logger.debug "[ActiveStorageDedup] Registered attachment #{key} with deduplicate=#{deduplicate}" if defined?(Rails)
52
+ end
53
+
54
+ def deduplicate_enabled_for?(record, attachment_name)
55
+ # First check: Is the gem enabled at all?
56
+ unless configuration.enabled
57
+ Rails.logger.debug "[ActiveStorageDedup] Gem is disabled globally (enabled=false)" if defined?(Rails)
58
+ return false
59
+ end
60
+
61
+ key = "#{record.class.name}##{attachment_name}"
62
+ settings = attachment_settings[key]
63
+
64
+ # Second check: Model-level setting takes precedence over global default
65
+ # If model explicitly sets deduplicate: true/false, use that
66
+ # Otherwise, fall back to configuration.deduplicate_by_default
67
+ if settings.nil?
68
+ result = configuration.deduplicate_by_default
69
+ Rails.logger.debug "[ActiveStorageDedup] Deduplication check for #{key}: #{result} (using deduplicate_by_default)" if defined?(Rails)
70
+ else
71
+ result = settings[:deduplicate]
72
+ Rails.logger.debug "[ActiveStorageDedup] Deduplication check for #{key}: #{result} (model-level override)" if defined?(Rails)
73
+ end
74
+
75
+ result
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveStorageDedup
4
+ class DeduplicationJob < ActiveJob::Base
5
+ queue_as :default
6
+
7
+ # Sanity check job to find and merge duplicate blobs across the entire database
8
+ # Can be run on-demand or scheduled (daily/weekly) to clean up any duplicates
9
+ # that may have slipped through due to race conditions
10
+ #
11
+ # @example Run manually
12
+ # ActiveStorageDedup::DeduplicationJob.perform_now
13
+ #
14
+ # @example Schedule with whenever gem
15
+ # every 1.day, at: '2:00 am' do
16
+ # runner "ActiveStorageDedup::DeduplicationJob.perform_later"
17
+ # end
18
+ #
19
+ # @example Schedule with sidekiq-cron
20
+ # ActiveStorageDedup::DeduplicationJob.set(cron: '0 2 * * *').perform_later
21
+ def perform
22
+ Rails.logger.info "[ActiveStorageDedup] 🔍 Starting sanity check - scanning for duplicate blobs..."
23
+
24
+ # Find all checksum+service combinations that have duplicates
25
+ duplicate_groups = ActiveStorage::Blob
26
+ .select(:checksum, :service_name)
27
+ .group(:checksum, :service_name)
28
+ .having('COUNT(*) > 1')
29
+ .count
30
+
31
+ if duplicate_groups.empty?
32
+ Rails.logger.info "[ActiveStorageDedup] ✓ No duplicate blobs found - database is clean!"
33
+ return
34
+ end
35
+
36
+ Rails.logger.info "[ActiveStorageDedup] Found #{duplicate_groups.size} group(s) with duplicates"
37
+
38
+ total_merged = 0
39
+ duplicate_groups.each do |(checksum, service_name), count|
40
+ merged = process_duplicate_group(checksum, service_name)
41
+ total_merged += merged
42
+ end
43
+
44
+ Rails.logger.info "[ActiveStorageDedup] ✓ Sanity check complete - merged #{total_merged} duplicate blob(s)"
45
+ end
46
+
47
+ private
48
+
49
+ def process_duplicate_group(checksum, service_name)
50
+ Rails.logger.debug "[ActiveStorageDedup] Processing duplicate group: checksum=#{checksum[0..12]}..., service=#{service_name}"
51
+
52
+ # Find all blobs with same checksum and service
53
+ duplicate_blobs = ActiveStorage::Blob
54
+ .where(checksum: checksum, service_name: service_name)
55
+ .order(:created_at)
56
+ .to_a
57
+
58
+ Rails.logger.debug "[ActiveStorageDedup] Found #{duplicate_blobs.size} blob(s) with checksum #{checksum[0..12]}..."
59
+
60
+ # Keep the oldest blob (first created)
61
+ keeper = duplicate_blobs.first
62
+ duplicates = duplicate_blobs[1..]
63
+
64
+ Rails.logger.info "[ActiveStorageDedup] 🔄 Merging #{duplicates.size} duplicate(s) into blob #{keeper.id} (checksum: #{checksum[0..12]}...)"
65
+
66
+ # Merge each duplicate into the keeper
67
+ duplicates.each do |duplicate_blob|
68
+ merge_duplicate(keeper, duplicate_blob)
69
+ end
70
+
71
+ duplicates.size
72
+ end
73
+
74
+ def merge_duplicate(keeper, duplicate)
75
+ Rails.logger.debug "[ActiveStorageDedup] Merging blob #{duplicate.id} into keeper #{keeper.id}..."
76
+
77
+ # Count attachments to move
78
+ attachment_count = duplicate.attachments.count
79
+ Rails.logger.debug "[ActiveStorageDedup] Moving #{attachment_count} attachment(s) from blob #{duplicate.id} to #{keeper.id}"
80
+
81
+ # Move all attachments from duplicate to keeper
82
+ duplicate.attachments.update_all(blob_id: keeper.id)
83
+
84
+ # Update counter cache on keeper
85
+ # Rails counter cache won't auto-update since we used update_all
86
+ keeper.increment!(:reference_count, attachment_count)
87
+ Rails.logger.debug "[ActiveStorageDedup] Updated keeper #{keeper.id} reference_count to #{keeper.reference_count}"
88
+
89
+ # Delete duplicate blob record (without purging file, since it's same as keeper)
90
+ duplicate.delete
91
+ Rails.logger.debug "[ActiveStorageDedup] Deleted duplicate blob #{duplicate.id} record"
92
+
93
+ Rails.logger.info "[ActiveStorageDedup] ✓ Merged blob #{duplicate.id} (#{attachment_count} attachment(s)) into #{keeper.id}"
94
+ rescue => e
95
+ Rails.logger.error "[ActiveStorageDedup] ✗ Error merging blob #{duplicate.id}: #{e.class.name} - #{e.message}"
96
+ Rails.logger.debug "[ActiveStorageDedup] Error backtrace: #{e.backtrace.first(5).join("\n")}"
97
+ # Don't raise - allow job to complete for other duplicates
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveStorageDedup
4
+ class Railtie < ::Rails::Railtie
5
+ # Use after_initialize for reliable patching
6
+ config.after_initialize do
7
+ Rails.logger.info "[ActiveStorageDedup] Initializing ActiveStorageDedup gem..."
8
+
9
+ # Patch ActiveStorage::Blob with deduplication hooks
10
+ ActiveSupport.on_load(:active_storage_blob) do
11
+ Rails.logger.debug "[ActiveStorageDedup] Patching ActiveStorage::Blob with BlobDeduplication module"
12
+ # Prepend deduplication module to singleton class (for class methods)
13
+ # Using prepend allows us to use 'super' to call the original methods
14
+ ActiveStorage::Blob.singleton_class.prepend(
15
+ ActiveStorageDedup::BlobDeduplication::ClassMethods
16
+ )
17
+ Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveStorage::Blob patched successfully"
18
+ end
19
+
20
+ # Patch ActiveStorage::Attachment with counter cache and lifecycle
21
+ ActiveSupport.on_load(:active_storage_attachment) do
22
+ Rails.logger.debug "[ActiveStorageDedup] Patching ActiveStorage::Attachment with AttachmentExtension module"
23
+ include ActiveStorageDedup::AttachmentExtension
24
+ Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveStorage::Attachment patched successfully"
25
+ end
26
+
27
+ # Patch Changes::CreateOne to pass context (Rails 6.0+)
28
+ if defined?(ActiveStorage::Attached::Changes::CreateOne)
29
+ Rails.logger.debug "[ActiveStorageDedup] Patching ActiveStorage::Attached::Changes::CreateOne with ChangesExtension module"
30
+ ActiveStorage::Attached::Changes::CreateOne.prepend(
31
+ ActiveStorageDedup::ChangesExtension
32
+ )
33
+ Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveStorage::Attached::Changes::CreateOne patched successfully"
34
+ else
35
+ Rails.logger.debug "[ActiveStorageDedup] ActiveStorage::Attached::Changes::CreateOne not found, skipping patch"
36
+ end
37
+
38
+ # Extend ActiveRecord with has_attached options
39
+ ActiveSupport.on_load(:active_record) do
40
+ Rails.logger.debug "[ActiveStorageDedup] Extending ActiveRecord with AttachmentOptions module"
41
+ extend ActiveStorageDedup::AttachmentOptions
42
+ Rails.logger.debug "[ActiveStorageDedup] ✓ ActiveRecord extended successfully"
43
+ end
44
+
45
+ Rails.logger.info "[ActiveStorageDedup] ✓ Gem initialization complete"
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveStorageDedup
4
+ VERSION = "1.0.0.alpha"
5
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "active_storage_dedup/version"
4
+ require_relative "active_storage_dedup/configuration"
5
+ require_relative "active_storage_dedup/blob_deduplication"
6
+ require_relative "active_storage_dedup/changes_extension"
7
+ require_relative "active_storage_dedup/attachment_options"
8
+ require_relative "active_storage_dedup/attachment_extension"
9
+
10
+ module ActiveStorageDedup
11
+ # Background job for deduplication
12
+ autoload :DeduplicationJob, "active_storage_dedup/deduplication_job"
13
+ end
14
+
15
+ # Load Railtie for Rails integration
16
+ require_relative "active_storage_dedup/railtie" if defined?(Rails::Railtie)
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+ require "rails/generators/migration"
5
+
6
+ module ActiveStorageDedup
7
+ module Generators
8
+ class InstallGenerator < ::Rails::Generators::Base
9
+ include Rails::Generators::Migration
10
+
11
+ source_root File.expand_path("templates", __dir__)
12
+
13
+ desc "Creates ActiveStorageDedup initializer and migration files"
14
+
15
+ def self.next_migration_number(path)
16
+ next_migration_number = current_migration_number(path) + 1
17
+ ActiveRecord::Migration.next_migration_number(next_migration_number)
18
+ end
19
+
20
+ def copy_initializer
21
+ template "initializer.rb", "config/initializers/active_storage_dedup.rb"
22
+ end
23
+
24
+ def copy_migrations
25
+ migration_template(
26
+ "add_active_storage_dedup.rb.erb",
27
+ "db/migrate/add_active_storage_dedup.rb"
28
+ )
29
+ end
30
+
31
+ def show_readme
32
+ readme "README" if behavior == :invoke
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,44 @@
1
+ ===============================================================================
2
+
3
+ ActiveStorageDedup has been installed!
4
+
5
+ Next steps:
6
+
7
+ 1. Run the migration:
8
+ rails db:migrate
9
+
10
+ 2. Review the configuration in config/initializers/active_storage_dedup.rb
11
+ The initializer has been created with sensible defaults.
12
+
13
+ 3. Optional: Control deduplication per-attachment:
14
+
15
+ class Product < ApplicationRecord
16
+ has_many_attached :images # Uses config.deduplicate_by_default
17
+ has_one_attached :unique_badge, deduplicate: false # Override: don't deduplicate
18
+ end
19
+
20
+ 4. Optional: Backfill reference_count for existing blobs:
21
+ rails active_storage_dedup:backfill_reference_count
22
+
23
+ 5. Optional: Schedule sanity check job (recommended weekly/monthly):
24
+
25
+ # Run manually
26
+ ActiveStorageDedup::DeduplicationJob.perform_now
27
+
28
+ # Or via rake task
29
+ rails active_storage_dedup:cleanup_all
30
+
31
+ # Or schedule with whenever/sidekiq-cron
32
+ every 1.week do
33
+ runner "ActiveStorageDedup::DeduplicationJob.perform_later"
34
+ end
35
+
36
+ Available rake tasks:
37
+ - rails active_storage_dedup:report_duplicates
38
+ - rails active_storage_dedup:cleanup_all
39
+ - rails active_storage_dedup:backfill_reference_count
40
+
41
+ For more information, visit:
42
+ https://github.com/yourusername/active_storage_dedup
43
+
44
+ ===============================================================================
@@ -0,0 +1,41 @@
1
+ class AddActiveStorageDedup < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def up
3
+ # Add reference_count column to active_storage_blobs
4
+ unless column_exists?(:active_storage_blobs, :reference_count)
5
+ add_column :active_storage_blobs, :reference_count, :integer, default: 0, null: false
6
+ end
7
+
8
+ # Add composite index on checksum and service_name for deduplication lookups
9
+ unless index_exists?(:active_storage_blobs, [:checksum, :service_name])
10
+ add_index :active_storage_blobs, [:checksum, :service_name], name: "index_active_storage_blobs_on_checksum_and_service"
11
+ end
12
+
13
+ # Backfill reference_count with actual attachment counts
14
+ # Using SQL for efficiency with large datasets
15
+ execute <<-SQL.squish
16
+ UPDATE active_storage_blobs
17
+ SET reference_count = (
18
+ SELECT COUNT(*)
19
+ FROM active_storage_attachments
20
+ WHERE active_storage_attachments.blob_id = active_storage_blobs.id
21
+ )
22
+ SQL
23
+
24
+ puts "ActiveStorageDedup migration complete!"
25
+ puts " - Added reference_count column to active_storage_blobs"
26
+ puts " - Added composite index on [checksum, service_name]"
27
+ puts " - Backfilled reference_count for #{ActiveStorage::Blob.count} existing blobs"
28
+ end
29
+
30
+ def down
31
+ # Remove index
32
+ if index_exists?(:active_storage_blobs, [:checksum, :service_name])
33
+ remove_index :active_storage_blobs, name: "index_active_storage_blobs_on_checksum_and_service"
34
+ end
35
+
36
+ # Remove column
37
+ if column_exists?(:active_storage_blobs, :reference_count)
38
+ remove_column :active_storage_blobs, :reference_count
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ ActiveStorageDedup.configure do |config|
4
+ # Master switch to enable/disable the entire gem
5
+ # Set to false to completely disable deduplication and lifecycle management
6
+ # Default: true
7
+ config.enabled = true
8
+
9
+ # Default deduplication setting for all attachments
10
+ # When enabled=true, this controls whether attachments deduplicate by default
11
+ # Can be overridden per-attachment using: has_many_attached :images, deduplicate: false
12
+ # Default: true
13
+ config.deduplicate_by_default = true
14
+
15
+ # Automatically purge orphaned blobs when reference_count reaches 0
16
+ # When enabled, blobs are automatically deleted when no attachments reference them
17
+ # Default: true
18
+ config.auto_purge_orphans = true
19
+ end
20
+
21
+ # Usage Examples:
22
+ #
23
+ # Opt-out pattern (deduplicate everything except specific attachments):
24
+ # config.enabled = true
25
+ # config.deduplicate_by_default = true
26
+ #
27
+ # class Product < ApplicationRecord
28
+ # has_many_attached :images # Deduplicates (uses default)
29
+ # has_one_attached :unique_badge, deduplicate: false # Does NOT deduplicate (override)
30
+ # end
31
+ #
32
+ # Opt-in pattern (only deduplicate specific attachments):
33
+ # config.enabled = true
34
+ # config.deduplicate_by_default = false
35
+ #
36
+ # class Product < ApplicationRecord
37
+ # has_many_attached :images, deduplicate: true # Deduplicates (override)
38
+ # has_one_attached :avatar # Does NOT deduplicate (uses default)
39
+ # end
40
+ #
41
+ # Disable gem entirely (useful for development/testing):
42
+ # config.enabled = false