archive_storage 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +348 -0
  3. data/archive_storage.gemspec +42 -0
  4. data/lib/archive_storage/adapters/filesystem.rb +135 -0
  5. data/lib/archive_storage/adapters/memory.rb +101 -0
  6. data/lib/archive_storage/adapters/metadata.rb +23 -0
  7. data/lib/archive_storage/adapters/s3.rb +186 -0
  8. data/lib/archive_storage/configuration.rb +115 -0
  9. data/lib/archive_storage/duration_parser.rb +26 -0
  10. data/lib/archive_storage/enqueuer.rb +29 -0
  11. data/lib/archive_storage/errors.rb +9 -0
  12. data/lib/archive_storage/jobs/migration_job.rb +28 -0
  13. data/lib/archive_storage/jobs/queue_job.rb +65 -0
  14. data/lib/archive_storage/jobs/sidekiq_migration_worker.rb +28 -0
  15. data/lib/archive_storage/jobs/sidekiq_queue_worker.rb +65 -0
  16. data/lib/archive_storage/migration_rate.rb +16 -0
  17. data/lib/archive_storage/migrator.rb +151 -0
  18. data/lib/archive_storage/model.rb +35 -0
  19. data/lib/archive_storage/models/file_record.rb +26 -0
  20. data/lib/archive_storage/mount_config.rb +50 -0
  21. data/lib/archive_storage/plan_result.rb +61 -0
  22. data/lib/archive_storage/planner.rb +190 -0
  23. data/lib/archive_storage/policy.rb +48 -0
  24. data/lib/archive_storage/policy_builder.rb +72 -0
  25. data/lib/archive_storage/railtie.rb +23 -0
  26. data/lib/archive_storage/registry.rb +109 -0
  27. data/lib/archive_storage/schedule_config.rb +79 -0
  28. data/lib/archive_storage/scheduler.rb +93 -0
  29. data/lib/archive_storage/storage.rb +91 -0
  30. data/lib/archive_storage/storage_config.rb +37 -0
  31. data/lib/archive_storage/storage_rule.rb +57 -0
  32. data/lib/archive_storage/stored_file.rb +94 -0
  33. data/lib/archive_storage/tasks.rake +82 -0
  34. data/lib/archive_storage/verification_result.rb +11 -0
  35. data/lib/archive_storage/verifier.rb +144 -0
  36. data/lib/archive_storage/version.rb +5 -0
  37. data/lib/archive_storage.rb +148 -0
  38. data/lib/generators/archive_storage/install_generator.rb +28 -0
  39. data/lib/generators/archive_storage/templates/create_archive_storage_files.rb +53 -0
  40. metadata +227 -0
@@ -0,0 +1,186 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "cgi"
4
+ require "stringio"
5
+ require "tempfile"
6
+ require_relative "../errors"
7
+ require_relative "metadata"
8
+
9
+ module ArchiveStorage
10
+ module Adapters
11
+ class S3
12
+ attr_reader :config
13
+
14
+ def initialize(config)
15
+ @config = config
16
+ end
17
+
18
+ def upload(key, file, content_type: nil)
19
+ body, close_body = upload_body(file)
20
+
21
+ client.put_object(
22
+ bucket: config.bucket,
23
+ key: key,
24
+ body: body,
25
+ content_type: content_type || detect_content_type(file)
26
+ )
27
+ ensure
28
+ body.close if close_body && body.respond_to?(:close)
29
+ end
30
+
31
+ def upload_path(key, path, content_type: nil)
32
+ ::File.open(path, "rb") do |file|
33
+ client.put_object(
34
+ bucket: config.bucket,
35
+ key: key,
36
+ body: file,
37
+ content_type: content_type
38
+ )
39
+ end
40
+ end
41
+
42
+ def read(key)
43
+ client.get_object(bucket: config.bucket, key: key).body.read
44
+ rescue not_found_errors => error
45
+ raise NotFoundError, error.message
46
+ end
47
+
48
+ def download_to(key, path)
49
+ ::File.open(path, "wb") do |file|
50
+ client.get_object(bucket: config.bucket, key: key) do |chunk|
51
+ file.write(chunk)
52
+ end
53
+ end
54
+ rescue not_found_errors => error
55
+ raise NotFoundError, error.message
56
+ end
57
+
58
+ def copy_from(source_adapter, source_key, target_key)
59
+ source_metadata = source_adapter.head(source_key)
60
+
61
+ Tempfile.create("archive-storage-copy") do |tempfile|
62
+ tempfile.binmode
63
+ source_adapter.download_to(source_key, tempfile.path)
64
+ upload_path(target_key, tempfile.path, content_type: source_metadata.content_type)
65
+ end
66
+ end
67
+
68
+ def head(key)
69
+ response = client.head_object(bucket: config.bucket, key: key)
70
+
71
+ Metadata.new(
72
+ byte_size: response.content_length,
73
+ content_type: response.content_type,
74
+ etag: clean_etag(response.etag),
75
+ checksum: response_checksum(response),
76
+ checksum_algorithm: response_checksum_algorithm(response),
77
+ metadata: response.metadata || {}
78
+ )
79
+ rescue not_found_errors => error
80
+ raise NotFoundError, error.message
81
+ end
82
+
83
+ def exists?(key)
84
+ head(key)
85
+ true
86
+ rescue NotFoundError
87
+ false
88
+ end
89
+
90
+ def delete(key)
91
+ client.delete_object(bucket: config.bucket, key: key)
92
+ true
93
+ end
94
+
95
+ def url(key, expires_in: 3600, public: nil, **_options)
96
+ if public || config.public?
97
+ public_url(key)
98
+ else
99
+ presigner.presigned_url(
100
+ :get_object,
101
+ bucket: config.bucket,
102
+ key: key,
103
+ expires_in: expires_in
104
+ )
105
+ end
106
+ end
107
+
108
+ def client
109
+ @client ||= begin
110
+ require "aws-sdk-s3"
111
+
112
+ Aws::S3::Client.new(client_options)
113
+ end
114
+ end
115
+
116
+ private
117
+
118
+ def client_options
119
+ {
120
+ access_key_id: config.access_key_id,
121
+ secret_access_key: config.secret_access_key,
122
+ region: config.region,
123
+ endpoint: config.endpoint,
124
+ force_path_style: config.path_style?
125
+ }.compact.merge(config.options || {})
126
+ end
127
+
128
+ def presigner
129
+ require "aws-sdk-s3"
130
+ Aws::S3::Presigner.new(client: client)
131
+ end
132
+
133
+ def public_url(key)
134
+ host = config.public_host || "#{config.endpoint}/#{config.bucket}"
135
+ "#{host.to_s.delete_suffix("/")}/#{escape_key(key)}"
136
+ end
137
+
138
+ def upload_body(file)
139
+ if file.respond_to?(:path) && file.path
140
+ [::File.open(file.path, "rb"), true]
141
+ elsif file.respond_to?(:to_file) && file.to_file
142
+ [file.to_file, false]
143
+ elsif file.respond_to?(:read)
144
+ [file, false]
145
+ else
146
+ [StringIO.new(file.to_s), true]
147
+ end
148
+ end
149
+
150
+ def detect_content_type(file)
151
+ file.content_type if file.respond_to?(:content_type)
152
+ end
153
+
154
+ def clean_etag(etag)
155
+ etag&.delete_prefix("\"")&.delete_suffix("\"")
156
+ end
157
+
158
+ def response_checksum(response)
159
+ checksum_algorithm = response_checksum_algorithm(response)
160
+ return nil unless checksum_algorithm
161
+
162
+ response.public_send("checksum_#{checksum_algorithm}") if response.respond_to?("checksum_#{checksum_algorithm}")
163
+ end
164
+
165
+ def response_checksum_algorithm(response)
166
+ %w[sha256 sha1 crc32c crc32].find do |algorithm|
167
+ response.respond_to?("checksum_#{algorithm}") &&
168
+ response.public_send("checksum_#{algorithm}")
169
+ end
170
+ end
171
+
172
+ def escape_key(key)
173
+ key.to_s.split("/").map { |part| CGI.escape(part) }.join("/")
174
+ end
175
+
176
+ def not_found_errors
177
+ require "aws-sdk-s3"
178
+ [
179
+ Aws::S3::Errors::NoSuchKey,
180
+ Aws::S3::Errors::NotFound,
181
+ Aws::S3::Errors::NoSuchBucket
182
+ ]
183
+ end
184
+ end
185
+ end
186
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "storage_config"
4
+ require_relative "mount_config"
5
+ require_relative "schedule_config"
6
+ require_relative "errors"
7
+
8
+ module ArchiveStorage
9
+ class Configuration
10
+ attr_accessor :migration_queue,
11
+ :schedule_queue,
12
+ :job_backend,
13
+ :verification_strategy,
14
+ :default_batch_size,
15
+ :default_cleanup_delay,
16
+ :enqueue_claim_ttl,
17
+ :delete_source_enabled,
18
+ :registry_class_name,
19
+ :fallback_on_read_errors
20
+
21
+ attr_reader :verify_checksums
22
+ attr_reader :storages, :mounts, :schedules
23
+
24
+ def initialize
25
+ @storages = {}
26
+ @mounts = []
27
+ @schedules = []
28
+ @adapter_cache = {}
29
+ @migration_queue = :default
30
+ @schedule_queue = :default
31
+ @job_backend = :active_job
32
+ @verification_strategy = :auto
33
+ @verify_checksums = false
34
+ @default_batch_size = 500
35
+ @default_cleanup_delay = 7 * 24 * 60 * 60
36
+ @enqueue_claim_ttl = 6 * 60 * 60
37
+ @delete_source_enabled = false
38
+ @registry_class_name = "ArchiveStorage::Models::FileRecord"
39
+ @fallback_on_read_errors = [NotFoundError]
40
+ end
41
+
42
+ def verify_checksums=(value)
43
+ @verify_checksums = value
44
+ @verification_strategy = :checksum if value
45
+ end
46
+
47
+ def storage(name, &block)
48
+ config = (@storages[name.to_sym] ||= StorageConfig.new(name))
49
+ block.call(config) if block
50
+ @adapter_cache.delete(name.to_sym)
51
+ config
52
+ end
53
+
54
+ def storage!(name)
55
+ @storages.fetch(name.to_sym) do
56
+ raise ConfigurationError, "unknown archive storage #{name.inspect}"
57
+ end
58
+ end
59
+
60
+ def adapter(name)
61
+ @adapter_cache[name.to_sym] ||= build_adapter(storage!(name))
62
+ end
63
+
64
+ def mount(model, mounted_as, uploader: nil, policy: nil)
65
+ MountConfig.new(model, mounted_as, uploader: uploader, policy: policy).tap do |mount|
66
+ @mounts.reject! { |existing| existing.matches_model?(model, mounted_as) }
67
+ @mounts << mount
68
+ end
69
+ end
70
+
71
+ def find_mount(model, mounted_as)
72
+ @mounts.find { |mount| mount.matches_model?(model, mounted_as) }
73
+ end
74
+
75
+ def schedule(name, cron:, model: nil, mounted_as: nil, uploader: nil, uploaders: nil, migration_rate: nil)
76
+ ScheduleConfig.new(
77
+ name,
78
+ cron: cron,
79
+ model: model,
80
+ mounted_as: mounted_as,
81
+ uploaders: Array(uploaders || uploader),
82
+ migration_rate: migration_rate
83
+ ).tap do |schedule|
84
+ schedule.validate!
85
+ @schedules << schedule
86
+ end
87
+ end
88
+
89
+ def registry_class
90
+ registry_class_name.to_s.split("::").inject(Object) do |namespace, const_name|
91
+ namespace.const_get(const_name)
92
+ end
93
+ end
94
+
95
+ private
96
+
97
+ def build_adapter(config)
98
+ return config.adapter if config.adapter
99
+
100
+ case config.provider.to_sym
101
+ when :s3
102
+ require_relative "adapters/s3"
103
+ Adapters::S3.new(config)
104
+ when :memory
105
+ require_relative "adapters/memory"
106
+ Adapters::Memory.new(config)
107
+ when :filesystem, :file, :nfs
108
+ require_relative "adapters/filesystem"
109
+ Adapters::FileSystem.new(config)
110
+ else
111
+ raise ConfigurationError, "unsupported storage provider #{config.provider.inspect}"
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ArchiveStorage
4
+ class DurationParser
5
+ UNITS = {
6
+ "s" => 1,
7
+ "m" => 60,
8
+ "h" => 60 * 60,
9
+ "d" => 24 * 60 * 60,
10
+ "w" => 7 * 24 * 60 * 60,
11
+ "mo" => 30 * 24 * 60 * 60,
12
+ "y" => 365 * 24 * 60 * 60
13
+ }.freeze
14
+
15
+ def self.parse(value)
16
+ return nil if value.nil? || value == ""
17
+ return value if value.is_a?(Numeric)
18
+ return value.to_i if value.respond_to?(:to_i) && !value.is_a?(String)
19
+
20
+ match = value.to_s.strip.match(/\A(\d+)\s*(mo|[smhdwy])\z/i)
21
+ raise ArgumentError, "invalid duration #{value.inspect}" unless match
22
+
23
+ match[1].to_i * UNITS.fetch(match[2].downcase)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ArchiveStorage
4
+ class Enqueuer
5
+ def initialize(backend: ArchiveStorage.configuration.job_backend)
6
+ @backend = backend.to_sym
7
+ end
8
+
9
+ def enqueue_migration(file_record_id)
10
+ case backend
11
+ when :inline
12
+ require_relative "migrator"
13
+ ArchiveStorage::Migrator.new.migrate_record!(ArchiveStorage.configuration.registry_class.find(file_record_id))
14
+ when :active_job, :good_job
15
+ require_relative "jobs/migration_job"
16
+ Jobs::MigrationJob.perform_later(file_record_id)
17
+ when :sidekiq
18
+ require_relative "jobs/sidekiq_migration_worker"
19
+ Jobs::SidekiqMigrationWorker.perform_async(file_record_id)
20
+ else
21
+ raise ConfigurationError, "unknown job backend #{backend.inspect}"
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ attr_reader :backend
28
+ end
29
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ArchiveStorage
4
+ Error = Class.new(StandardError)
5
+ ConfigurationError = Class.new(Error)
6
+ NotFoundError = Class.new(Error)
7
+ VerificationError = Class.new(Error)
8
+ RegistryUnavailableError = Class.new(Error)
9
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "active_job"
5
+ rescue LoadError
6
+ # ActiveJob is available in Rails apps.
7
+ end
8
+
9
+ require_relative "../migrator"
10
+
11
+ module ArchiveStorage
12
+ module Jobs
13
+ if defined?(::ActiveJob::Base)
14
+ class MigrationJob < ::ActiveJob::Base
15
+ queue_as do
16
+ ArchiveStorage.configuration.migration_queue
17
+ end
18
+
19
+ def perform(file_record_id)
20
+ record = ArchiveStorage.configuration.registry_class.find_by(id: file_record_id)
21
+ return unless record
22
+
23
+ Migrator.new.migrate_record!(record)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "active_job"
5
+ rescue LoadError
6
+ # ActiveJob is available in Rails apps.
7
+ end
8
+
9
+ require_relative "../errors"
10
+ require_relative "../migrator"
11
+ require_relative "../planner"
12
+
13
+ module ArchiveStorage
14
+ module Jobs
15
+ if defined?(::ActiveJob::Base)
16
+ class QueueJob < ::ActiveJob::Base
17
+ queue_as do
18
+ ArchiveStorage.configuration.schedule_queue
19
+ end
20
+
21
+ def perform(options = {})
22
+ options = symbolize_keys(options)
23
+ remaining = migration_limit(options)
24
+ total = 0
25
+
26
+ planner_options(options).each do |planner_options|
27
+ break if remaining && remaining <= 0
28
+
29
+ planner = Planner.new(**planner_options.merge(limit: remaining))
30
+ count = Migrator.new(planner: planner).enqueue_or_migrate!
31
+ remaining -= count if remaining
32
+ total += count
33
+ end
34
+
35
+ total
36
+ end
37
+
38
+ private
39
+
40
+ def symbolize_keys(hash)
41
+ hash.to_h.transform_keys(&:to_sym)
42
+ end
43
+
44
+ def scheduled_uploaders(options)
45
+ Array(options[:uploaders] || options[:uploader]).flatten.compact.tap do |uploaders|
46
+ raise ConfigurationError, "ArchiveStorage::Jobs::QueueJob requires uploader or uploaders" if uploaders.empty?
47
+ end
48
+ end
49
+
50
+ def planner_options(options)
51
+ if options[:model] && options[:mounted_as]
52
+ [{ model: options[:model], mounted_as: options[:mounted_as] }]
53
+ else
54
+ scheduled_uploaders(options).map { |uploader| { uploader: uploader } }
55
+ end
56
+ end
57
+
58
+ def migration_limit(options)
59
+ limit = options[:migration_rate] || options[:limit]
60
+ limit&.to_i
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "sidekiq"
5
+ rescue LoadError
6
+ # Sidekiq is optional. This worker is only used when job_backend is :sidekiq.
7
+ end
8
+
9
+ require_relative "../migrator"
10
+
11
+ module ArchiveStorage
12
+ module Jobs
13
+ if defined?(::Sidekiq)
14
+ class SidekiqMigrationWorker
15
+ include ::Sidekiq::Worker
16
+
17
+ sidekiq_options queue: ArchiveStorage.configuration.migration_queue.to_s
18
+
19
+ def perform(file_record_id)
20
+ record = ArchiveStorage.configuration.registry_class.find_by(id: file_record_id)
21
+ return unless record
22
+
23
+ Migrator.new.migrate_record!(record)
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "sidekiq"
5
+ rescue LoadError
6
+ # Sidekiq is optional. Require this file only in Sidekiq-backed apps.
7
+ end
8
+
9
+ require_relative "../errors"
10
+ require_relative "../migrator"
11
+ require_relative "../planner"
12
+
13
+ module ArchiveStorage
14
+ module Jobs
15
+ if defined?(::Sidekiq)
16
+ class SidekiqQueueWorker
17
+ include ::Sidekiq::Worker
18
+
19
+ sidekiq_options queue: ArchiveStorage.configuration.schedule_queue.to_s
20
+
21
+ def perform(options = {})
22
+ options = symbolize_keys(options)
23
+ remaining = migration_limit(options)
24
+ total = 0
25
+
26
+ planner_options(options).each do |planner_options|
27
+ break if remaining && remaining <= 0
28
+
29
+ planner = Planner.new(**planner_options.merge(limit: remaining))
30
+ count = Migrator.new(planner: planner).enqueue_or_migrate!
31
+ remaining -= count if remaining
32
+ total += count
33
+ end
34
+
35
+ total
36
+ end
37
+
38
+ private
39
+
40
+ def symbolize_keys(hash)
41
+ hash.to_h.transform_keys(&:to_sym)
42
+ end
43
+
44
+ def scheduled_uploaders(options)
45
+ Array(options[:uploaders] || options[:uploader]).flatten.compact.tap do |uploaders|
46
+ raise ConfigurationError, "ArchiveStorage::Jobs::SidekiqQueueWorker requires uploader or uploaders" if uploaders.empty?
47
+ end
48
+ end
49
+
50
+ def planner_options(options)
51
+ if options[:model] && options[:mounted_as]
52
+ [{ model: options[:model], mounted_as: options[:mounted_as] }]
53
+ else
54
+ scheduled_uploaders(options).map { |uploader| { uploader: uploader } }
55
+ end
56
+ end
57
+
58
+ def migration_limit(options)
59
+ limit = options[:migration_rate] || options[:limit]
60
+ limit&.to_i
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ArchiveStorage
4
+ class MigrationRate
5
+ attr_reader :files
6
+
7
+ def initialize(files)
8
+ @files = Integer(files)
9
+ raise ArgumentError, "migration rate must be greater than zero" unless files.positive?
10
+ end
11
+
12
+ def max_files_per_run
13
+ files
14
+ end
15
+ end
16
+ end