archive_storage 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +348 -0
- data/archive_storage.gemspec +42 -0
- data/lib/archive_storage/adapters/filesystem.rb +135 -0
- data/lib/archive_storage/adapters/memory.rb +101 -0
- data/lib/archive_storage/adapters/metadata.rb +23 -0
- data/lib/archive_storage/adapters/s3.rb +186 -0
- data/lib/archive_storage/configuration.rb +115 -0
- data/lib/archive_storage/duration_parser.rb +26 -0
- data/lib/archive_storage/enqueuer.rb +29 -0
- data/lib/archive_storage/errors.rb +9 -0
- data/lib/archive_storage/jobs/migration_job.rb +28 -0
- data/lib/archive_storage/jobs/queue_job.rb +65 -0
- data/lib/archive_storage/jobs/sidekiq_migration_worker.rb +28 -0
- data/lib/archive_storage/jobs/sidekiq_queue_worker.rb +65 -0
- data/lib/archive_storage/migration_rate.rb +16 -0
- data/lib/archive_storage/migrator.rb +151 -0
- data/lib/archive_storage/model.rb +35 -0
- data/lib/archive_storage/models/file_record.rb +26 -0
- data/lib/archive_storage/mount_config.rb +50 -0
- data/lib/archive_storage/plan_result.rb +61 -0
- data/lib/archive_storage/planner.rb +190 -0
- data/lib/archive_storage/policy.rb +48 -0
- data/lib/archive_storage/policy_builder.rb +72 -0
- data/lib/archive_storage/railtie.rb +23 -0
- data/lib/archive_storage/registry.rb +109 -0
- data/lib/archive_storage/schedule_config.rb +79 -0
- data/lib/archive_storage/scheduler.rb +93 -0
- data/lib/archive_storage/storage.rb +91 -0
- data/lib/archive_storage/storage_config.rb +37 -0
- data/lib/archive_storage/storage_rule.rb +57 -0
- data/lib/archive_storage/stored_file.rb +94 -0
- data/lib/archive_storage/tasks.rake +82 -0
- data/lib/archive_storage/verification_result.rb +11 -0
- data/lib/archive_storage/verifier.rb +144 -0
- data/lib/archive_storage/version.rb +5 -0
- data/lib/archive_storage.rb +148 -0
- data/lib/generators/archive_storage/install_generator.rb +28 -0
- data/lib/generators/archive_storage/templates/create_archive_storage_files.rb +53 -0
- metadata +227 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 32099fdbd406f02b06d31a562372c848c0d520aa6d4da0278793aecb4ed6cfe5
|
|
4
|
+
data.tar.gz: e28f3f328bcefdf82382b854a7dc7841367f9b6f42e83c5a65fb62082a6985a7
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 2db3a3be0b2d4300e53d00c6ae1e60f450e9201dd37efe62b2c703f1f50a075e212e9ac09265c1b1814a567b0b43ed4e142aa873eac32c20f3d240dab3860146
|
|
7
|
+
data.tar.gz: 3fd152d37cb0f5e4f4f0566f9c1b0577fc877cddf728abde9b6a35fcbce5fa0834c1c2dbfa0c686f431e8537126a7aef8fe20606aa2b88694108cedff895a431
|
data/README.md
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
# archive_storage
|
|
2
|
+
|
|
3
|
+
Zero-downtime archival storage for CarrierWave uploads.
|
|
4
|
+
|
|
5
|
+
`archive_storage` moves older uploaded files from one storage backend to another, keeps a registry of the current file location, and routes reads to the right backend. It currently integrates with CarrierWave; support for other uploader libraries can be added later without changing the registry model.
|
|
6
|
+
|
|
7
|
+
Supported storage adapters:
|
|
8
|
+
|
|
9
|
+
- S3-compatible object storage, including MinIO and AWS S3
|
|
10
|
+
- filesystem/NFS
|
|
11
|
+
- memory adapter for tests
|
|
12
|
+
|
|
13
|
+
Typical use cases:
|
|
14
|
+
|
|
15
|
+
- `main` S3/MinIO bucket -> `archive_001` cold bucket
|
|
16
|
+
- `archive_001` -> `archive_002` when the first archive fills up
|
|
17
|
+
- NFS/local disk -> S3-compatible archive storage
|
|
18
|
+
|
|
19
|
+
## Features
|
|
20
|
+
|
|
21
|
+
- model-first DSL: `archive_storage_for :file`
|
|
22
|
+
- automatic CarrierWave storage wiring
|
|
23
|
+
- ActiveRecord registry table: `archive_storage_files`
|
|
24
|
+
- dry-run planning
|
|
25
|
+
- scheduled enqueueing
|
|
26
|
+
- background migration jobs
|
|
27
|
+
- copy, verify, read switch, fallback read, delayed source cleanup
|
|
28
|
+
- optional CarrierWave versions/thumbs migration
|
|
29
|
+
- GoodJob, ActiveJob, Sidekiq, `sidekiq-cron`, and `sidekiq-scheduler` support
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
Add the gem:
|
|
34
|
+
|
|
35
|
+
```ruby
|
|
36
|
+
gem "archive_storage"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
For S3-compatible storage:
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
gem "aws-sdk-s3"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Install the registry table:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
bin/rails generate archive_storage:install
|
|
49
|
+
bin/rails db:migrate
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Configuration
|
|
53
|
+
|
|
54
|
+
Define the storage backends and scheduled archive jobs.
|
|
55
|
+
|
|
56
|
+
```ruby
|
|
57
|
+
# config/initializers/archive_storage.rb
|
|
58
|
+
|
|
59
|
+
ArchiveStorage.configure do |config|
|
|
60
|
+
config.storage :main do |s|
|
|
61
|
+
s.provider = :s3
|
|
62
|
+
s.endpoint = ENV.fetch("MAIN_STORAGE_ENDPOINT")
|
|
63
|
+
s.bucket = "production-main"
|
|
64
|
+
s.access_key_id = ENV.fetch("MAIN_STORAGE_ACCESS_KEY")
|
|
65
|
+
s.secret_access_key = ENV.fetch("MAIN_STORAGE_SECRET_KEY")
|
|
66
|
+
s.region = "us-east-1"
|
|
67
|
+
s.path_style = true
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
config.storage :archive_001 do |s|
|
|
71
|
+
s.provider = :s3
|
|
72
|
+
s.endpoint = ENV.fetch("ARCHIVE_001_ENDPOINT")
|
|
73
|
+
s.bucket = "production-archive-001"
|
|
74
|
+
s.access_key_id = ENV.fetch("ARCHIVE_001_ACCESS_KEY")
|
|
75
|
+
s.secret_access_key = ENV.fetch("ARCHIVE_001_SECRET_KEY")
|
|
76
|
+
s.region = "us-east-1"
|
|
77
|
+
s.path_style = true
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
config.storage :archive_002 do |s|
|
|
81
|
+
s.provider = :s3
|
|
82
|
+
s.endpoint = ENV.fetch("ARCHIVE_002_ENDPOINT")
|
|
83
|
+
s.bucket = "production-archive-002"
|
|
84
|
+
s.access_key_id = ENV.fetch("ARCHIVE_002_ACCESS_KEY")
|
|
85
|
+
s.secret_access_key = ENV.fetch("ARCHIVE_002_SECRET_KEY")
|
|
86
|
+
s.region = "us-east-1"
|
|
87
|
+
s.path_style = true
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
config.schedule :archive_documents,
|
|
91
|
+
cron: "0 0-6,22,23 * * 1-5",
|
|
92
|
+
model: "ProjectDocument",
|
|
93
|
+
mounted_as: :file,
|
|
94
|
+
migration_rate: 10_000
|
|
95
|
+
|
|
96
|
+
# Optional defaults:
|
|
97
|
+
#
|
|
98
|
+
# config.job_backend = :active_job # :active_job, :good_job, :sidekiq, or :inline
|
|
99
|
+
# config.migration_queue = :default
|
|
100
|
+
# config.schedule_queue = :default
|
|
101
|
+
# config.default_batch_size = 500
|
|
102
|
+
# config.verification_strategy = :auto
|
|
103
|
+
# config.delete_source_enabled = false
|
|
104
|
+
# config.default_cleanup_delay = 7.days
|
|
105
|
+
end
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Filesystem/NFS storage can be mixed with S3-compatible storage:
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
config.storage :nfs_main do |s|
|
|
112
|
+
s.provider = :filesystem
|
|
113
|
+
s.root_path = "/mnt/uploads"
|
|
114
|
+
end
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Model Policy
|
|
118
|
+
|
|
119
|
+
Put archive policy next to the model that owns the file.
|
|
120
|
+
|
|
121
|
+
```ruby
|
|
122
|
+
class ProjectDocument < ApplicationRecord
|
|
123
|
+
mount_uploader :file, DocumentUploader
|
|
124
|
+
|
|
125
|
+
archive_storage_for :file do
|
|
126
|
+
primary :main
|
|
127
|
+
|
|
128
|
+
archive :archive_001,
|
|
129
|
+
after: 90.days,
|
|
130
|
+
scope: :ready_for_archive,
|
|
131
|
+
if: ->(record) { record.closed? }
|
|
132
|
+
|
|
133
|
+
archive :archive_002,
|
|
134
|
+
after: 2.years,
|
|
135
|
+
scope: ->(records) { records.where(priority: "low") },
|
|
136
|
+
if: ->(record) { record.closed? }
|
|
137
|
+
|
|
138
|
+
read_fallbacks :main, :archive_001, :archive_002
|
|
139
|
+
|
|
140
|
+
# Optional:
|
|
141
|
+
#
|
|
142
|
+
# delete_source_after verification: true, delay: 7.days
|
|
143
|
+
# include_versions true
|
|
144
|
+
# versions :thumb, :preview
|
|
145
|
+
# timestamp_attribute :created_at
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
`archive_storage_for` automatically wires the mounted CarrierWave uploader to `storage :archive_storage`. The uploader can stay focused on path, filename, and version behavior:
|
|
151
|
+
|
|
152
|
+
```ruby
|
|
153
|
+
class DocumentUploader < CarrierWave::Uploader::Base
|
|
154
|
+
def store_dir
|
|
155
|
+
"uploads/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}"
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Policy notes:
|
|
161
|
+
|
|
162
|
+
- `primary` is where new uploads are stored.
|
|
163
|
+
- `archive` rules are checked in order; the last eligible rule wins.
|
|
164
|
+
- `scope` narrows the model relation before records are scanned. It can be a model scope name, a relation, or a callable that receives the current relation.
|
|
165
|
+
- `read_fallbacks` is the read-recovery order when registry metadata is missing or a configured fallback error is raised.
|
|
166
|
+
- By default only the original CarrierWave file is planned. Use `include_versions true` or `versions ...` when thumbnails/previews must move too.
|
|
167
|
+
|
|
168
|
+
## Scheduled Jobs
|
|
169
|
+
|
|
170
|
+
Schedules are declared in global configuration:
|
|
171
|
+
|
|
172
|
+
```ruby
|
|
173
|
+
ArchiveStorage.configure do |config|
|
|
174
|
+
config.schedule :archive_documents,
|
|
175
|
+
cron: "0 0-6,22,23 * * 1-5",
|
|
176
|
+
model: "ProjectDocument",
|
|
177
|
+
mounted_as: :file,
|
|
178
|
+
migration_rate: 10_000
|
|
179
|
+
end
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
`migration_rate` means at most this many files are enqueued by one scheduled run.
|
|
183
|
+
|
|
184
|
+
`archive_storage` registers scheduler entries automatically. You do not need to merge `ArchiveStorage.good_job_cron` or `ArchiveStorage.sidekiq_cron` into your application config.
|
|
185
|
+
|
|
186
|
+
### GoodJob
|
|
187
|
+
|
|
188
|
+
When `good_job` is present, `archive_storage` appends its entries to `config.good_job.cron` after Rails initialization. Existing GoodJob cron entries are preserved.
|
|
189
|
+
|
|
190
|
+
Enable GoodJob cron in the app environment where the scheduler should run:
|
|
191
|
+
|
|
192
|
+
```ruby
|
|
193
|
+
# config/environments/production.rb
|
|
194
|
+
|
|
195
|
+
Rails.application.configure do
|
|
196
|
+
config.good_job.enable_cron = true
|
|
197
|
+
end
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Sidekiq
|
|
201
|
+
|
|
202
|
+
Use Sidekiq for migration jobs:
|
|
203
|
+
|
|
204
|
+
```ruby
|
|
205
|
+
# config/initializers/archive_storage.rb
|
|
206
|
+
|
|
207
|
+
ArchiveStorage.configure do |config|
|
|
208
|
+
config.job_backend = :sidekiq
|
|
209
|
+
end
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Add one scheduler gem:
|
|
213
|
+
|
|
214
|
+
```ruby
|
|
215
|
+
gem "sidekiq-cron"
|
|
216
|
+
# or
|
|
217
|
+
gem "sidekiq-scheduler"
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
On Sidekiq server startup, `archive_storage` adds its own schedules without deleting existing jobs:
|
|
221
|
+
|
|
222
|
+
- with `sidekiq-cron`, it uses non-destructive `Sidekiq::Cron::Job.load_from_hash`
|
|
223
|
+
- with `sidekiq-scheduler`, it uses `Sidekiq.set_schedule` and reloads the scheduler
|
|
224
|
+
|
|
225
|
+
Existing jobs from `sidekiq.yml`, `config/schedule.yml`, or custom initializers remain in place.
|
|
226
|
+
|
|
227
|
+
## Commands
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
bin/rails archive_storage:plan MODEL=ProjectDocument MOUNT=file
|
|
231
|
+
bin/rails archive_storage:enqueue MODEL=ProjectDocument MOUNT=file
|
|
232
|
+
bin/rails archive_storage:migrate MODEL=ProjectDocument MOUNT=file
|
|
233
|
+
bin/rails archive_storage:verify
|
|
234
|
+
bin/rails archive_storage:cleanup_source
|
|
235
|
+
bin/rails archive_storage:status
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Options:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
MODEL=ProjectDocument
|
|
242
|
+
MOUNT=file
|
|
243
|
+
OLDER_THAN=90d
|
|
244
|
+
LIMIT=10000
|
|
245
|
+
INLINE=true
|
|
246
|
+
ESTIMATE_SIZES=false
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
`UPLOADER=DocumentUploader` is still accepted for advanced/legacy uploader-level configurations.
|
|
250
|
+
|
|
251
|
+
Command behavior:
|
|
252
|
+
|
|
253
|
+
- `plan` prints a dry-run plan.
|
|
254
|
+
- `enqueue` and `migrate` enqueue migration jobs by default.
|
|
255
|
+
- `migrate INLINE=true` runs migration inline.
|
|
256
|
+
- `verify` re-checks already migrated files.
|
|
257
|
+
- `cleanup_source` deletes verified source copies that are past the cleanup delay.
|
|
258
|
+
- `status` prints registry counters.
|
|
259
|
+
|
|
260
|
+
## Migration Flow
|
|
261
|
+
|
|
262
|
+
```text
|
|
263
|
+
source only
|
|
264
|
+
source + destination copied
|
|
265
|
+
destination verified
|
|
266
|
+
registry points reads to destination
|
|
267
|
+
reads can fallback to source
|
|
268
|
+
source deleted later when cleanup is enabled
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
Source deletion is disabled by default:
|
|
272
|
+
|
|
273
|
+
```ruby
|
|
274
|
+
config.delete_source_enabled = false
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
Turn it on only after the migration path has been verified in production:
|
|
278
|
+
|
|
279
|
+
```ruby
|
|
280
|
+
config.delete_source_enabled = true
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
Per-mount cleanup delay:
|
|
284
|
+
|
|
285
|
+
```ruby
|
|
286
|
+
archive_storage_for :file do
|
|
287
|
+
delete_source_after verification: true, delay: 7.days
|
|
288
|
+
end
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
## Verification
|
|
292
|
+
|
|
293
|
+
The default strategy is `:auto`.
|
|
294
|
+
|
|
295
|
+
`archive_storage` does not blindly trust S3 ETags. Multipart S3 uploads can have ETags like `hash-3`, and uploading the same bytes to another storage can produce a different ETag.
|
|
296
|
+
|
|
297
|
+
Strategies:
|
|
298
|
+
|
|
299
|
+
- `:auto` - size check, then checksum when available, then non-multipart ETag, otherwise size-only
|
|
300
|
+
- `:checksum` - require matching checksums
|
|
301
|
+
- `:safe_etag` - require matching non-multipart ETags
|
|
302
|
+
- `:etag` - require matching ETags, including multipart-looking values
|
|
303
|
+
- `:byte_compare` - compare full file bytes after size check
|
|
304
|
+
- `:size` - compare content length only
|
|
305
|
+
|
|
306
|
+
```ruby
|
|
307
|
+
ArchiveStorage.configure do |config|
|
|
308
|
+
config.verification_strategy = :auto
|
|
309
|
+
end
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## Registry
|
|
313
|
+
|
|
314
|
+
The generated migration creates `archive_storage_files`.
|
|
315
|
+
|
|
316
|
+
The registry stores:
|
|
317
|
+
|
|
318
|
+
- model identity: `record_type`, `record_id`, `mounted_as`, `uploader`
|
|
319
|
+
- object identity: `identifier`, `storage_key`, source/target keys
|
|
320
|
+
- storage state: `current_storage`, `source_storage`, `target_storage`
|
|
321
|
+
- migration state: enqueue, migration, verification, cleanup timestamps
|
|
322
|
+
- metadata: byte size, checksum, content type, attempts, last error
|
|
323
|
+
|
|
324
|
+
Business tables do not need extra columns for archive location.
|
|
325
|
+
|
|
326
|
+
## CarrierWave Versions
|
|
327
|
+
|
|
328
|
+
CarrierWave versions are disabled by default.
|
|
329
|
+
|
|
330
|
+
```ruby
|
|
331
|
+
archive_storage_for :file do
|
|
332
|
+
include_versions true
|
|
333
|
+
end
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
To migrate only selected versions:
|
|
337
|
+
|
|
338
|
+
```ruby
|
|
339
|
+
archive_storage_for :file do
|
|
340
|
+
versions :thumb, :preview
|
|
341
|
+
end
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Use this only when those files are stored and read as part of the same archival policy. It can multiply the number of objects planned for migration.
|
|
345
|
+
|
|
346
|
+
## Current Scope
|
|
347
|
+
|
|
348
|
+
This MVP is focused on Rails, ActiveRecord, and CarrierWave. The storage and registry layers are not CarrierWave-specific, so other uploader integrations can be added later.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/archive_storage/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "archive_storage"
|
|
7
|
+
spec.version = ArchiveStorage::VERSION
|
|
8
|
+
spec.authors = ["E. Tashkovyan"]
|
|
9
|
+
spec.email = []
|
|
10
|
+
|
|
11
|
+
spec.summary = "Policy-based archive storage and zero-downtime file migration."
|
|
12
|
+
spec.description = "Move uploads across storage backends such as filesystem, NFS, MinIO, and S3 without downtime."
|
|
13
|
+
spec.homepage = "https://github.com/estashkovyan/archive_storage"
|
|
14
|
+
spec.license = "MIT"
|
|
15
|
+
spec.required_ruby_version = ">= 3.1.0"
|
|
16
|
+
|
|
17
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
|
18
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
19
|
+
spec.metadata["source_code_uri"] = "#{spec.homepage}/tree/main"
|
|
20
|
+
spec.metadata["changelog_uri"] = "#{spec.homepage}/releases"
|
|
21
|
+
|
|
22
|
+
spec.files = Dir.chdir(__dir__) do
|
|
23
|
+
Dir[
|
|
24
|
+
"lib/**/*",
|
|
25
|
+
"LICENSE.txt",
|
|
26
|
+
"README.md",
|
|
27
|
+
"archive_storage.gemspec"
|
|
28
|
+
]
|
|
29
|
+
end
|
|
30
|
+
spec.bindir = "exe"
|
|
31
|
+
spec.require_paths = ["lib"]
|
|
32
|
+
|
|
33
|
+
spec.add_dependency "activejob", ">= 6.1", "< 9.0"
|
|
34
|
+
spec.add_dependency "activerecord", ">= 6.1", "< 9.0"
|
|
35
|
+
spec.add_dependency "activesupport", ">= 6.1", "< 9.0"
|
|
36
|
+
spec.add_dependency "railties", ">= 6.1", "< 9.0"
|
|
37
|
+
|
|
38
|
+
spec.add_development_dependency "aws-sdk-s3", "~> 1"
|
|
39
|
+
spec.add_development_dependency "carrierwave", ">= 2.2", "< 4.0"
|
|
40
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
|
41
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
|
42
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
require_relative "../errors"
|
|
6
|
+
require_relative "metadata"
|
|
7
|
+
|
|
8
|
+
module ArchiveStorage
|
|
9
|
+
module Adapters
|
|
10
|
+
class FileSystem
|
|
11
|
+
attr_reader :config
|
|
12
|
+
|
|
13
|
+
def initialize(config)
|
|
14
|
+
@config = config
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def upload(key, file, content_type: nil)
|
|
18
|
+
write(key, read_upload_body(file), content_type: content_type || detect_content_type(file))
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def write(key, body, content_type: nil, metadata: {})
|
|
22
|
+
path = path_for(key)
|
|
23
|
+
::FileUtils.mkdir_p(::File.dirname(path))
|
|
24
|
+
::File.binwrite(path, body.to_s.b)
|
|
25
|
+
write_metadata(key, content_type: content_type, metadata: metadata)
|
|
26
|
+
true
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def read(key)
|
|
30
|
+
raise_not_found(key) unless exists?(key)
|
|
31
|
+
|
|
32
|
+
::File.binread(path_for(key))
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def download_to(key, path)
|
|
36
|
+
raise_not_found(key) unless exists?(key)
|
|
37
|
+
|
|
38
|
+
::FileUtils.mkdir_p(::File.dirname(path))
|
|
39
|
+
::FileUtils.cp(path_for(key), path)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def copy_from(source_adapter, source_key, target_key)
|
|
43
|
+
metadata = source_adapter.head(source_key)
|
|
44
|
+
write(target_key, source_adapter.read(source_key), content_type: metadata.content_type, metadata: metadata.metadata || {})
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def head(key)
|
|
48
|
+
raise_not_found(key) unless exists?(key)
|
|
49
|
+
|
|
50
|
+
body = ::File.binread(path_for(key))
|
|
51
|
+
stored_metadata = read_metadata(key)
|
|
52
|
+
|
|
53
|
+
checksum = Digest::MD5.hexdigest(body)
|
|
54
|
+
|
|
55
|
+
Metadata.new(
|
|
56
|
+
byte_size: body.bytesize,
|
|
57
|
+
content_type: stored_metadata[:content_type],
|
|
58
|
+
etag: nil,
|
|
59
|
+
checksum: checksum,
|
|
60
|
+
checksum_algorithm: "md5",
|
|
61
|
+
metadata: stored_metadata[:metadata] || {}
|
|
62
|
+
)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def exists?(key)
|
|
66
|
+
::File.file?(path_for(key))
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def delete(key)
|
|
70
|
+
::FileUtils.rm_f(path_for(key))
|
|
71
|
+
::FileUtils.rm_f(metadata_path_for(key))
|
|
72
|
+
true
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def url(key, **_options)
|
|
76
|
+
raise_not_found(key) unless exists?(key)
|
|
77
|
+
|
|
78
|
+
if config.base_url
|
|
79
|
+
"#{config.base_url.to_s.delete_suffix("/")}/#{key}"
|
|
80
|
+
else
|
|
81
|
+
path_for(key)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
def path_for(key)
|
|
88
|
+
root = config.root_path || raise(ConfigurationError, "filesystem storage #{config.name.inspect} requires root_path")
|
|
89
|
+
expanded_root = ::File.expand_path(root)
|
|
90
|
+
expanded_path = ::File.expand_path(::File.join(expanded_root, key.to_s))
|
|
91
|
+
|
|
92
|
+
unless expanded_path == expanded_root || expanded_path.start_with?("#{expanded_root}#{::File::SEPARATOR}")
|
|
93
|
+
raise ConfigurationError, "storage key escapes filesystem root: #{key.inspect}"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
expanded_path
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def metadata_path_for(key)
|
|
100
|
+
"#{path_for(key)}.archive_storage.json"
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def read_upload_body(file)
|
|
104
|
+
return ::File.binread(file.path) if file.respond_to?(:path) && file.path
|
|
105
|
+
return file.read if file.respond_to?(:read)
|
|
106
|
+
|
|
107
|
+
file.to_s
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def detect_content_type(file)
|
|
111
|
+
file.content_type if file.respond_to?(:content_type)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def write_metadata(key, content_type:, metadata:)
|
|
115
|
+
return if content_type.nil? && metadata.empty?
|
|
116
|
+
|
|
117
|
+
require "json"
|
|
118
|
+
::File.binwrite(metadata_path_for(key), JSON.dump(content_type: content_type, metadata: metadata))
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def read_metadata(key)
|
|
122
|
+
return {} unless ::File.file?(metadata_path_for(key))
|
|
123
|
+
|
|
124
|
+
require "json"
|
|
125
|
+
JSON.parse(::File.binread(metadata_path_for(key)), symbolize_names: true)
|
|
126
|
+
rescue JSON::ParserError
|
|
127
|
+
{}
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def raise_not_found(key)
|
|
131
|
+
raise NotFoundError, "object #{key.inspect} not found"
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require_relative "../errors"
|
|
5
|
+
require_relative "metadata"
|
|
6
|
+
|
|
7
|
+
module ArchiveStorage
|
|
8
|
+
module Adapters
|
|
9
|
+
class Memory
|
|
10
|
+
attr_reader :config
|
|
11
|
+
|
|
12
|
+
def initialize(config)
|
|
13
|
+
@config = config
|
|
14
|
+
@objects = {}
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def upload(key, file, content_type: nil)
|
|
18
|
+
body = read_upload_body(file)
|
|
19
|
+
write(
|
|
20
|
+
key,
|
|
21
|
+
body,
|
|
22
|
+
content_type: content_type || detect_content_type(file)
|
|
23
|
+
)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def write(key, body, content_type: nil, metadata: {})
|
|
27
|
+
@objects[key] = {
|
|
28
|
+
body: body.to_s.b,
|
|
29
|
+
content_type: content_type,
|
|
30
|
+
metadata: metadata
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def read(key)
|
|
35
|
+
object_for(key).fetch(:body)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def download_to(key, path)
|
|
39
|
+
::File.binwrite(path, read(key))
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def copy_from(source_adapter, source_key, target_key)
|
|
43
|
+
source_metadata = source_adapter.head(source_key)
|
|
44
|
+
write(
|
|
45
|
+
target_key,
|
|
46
|
+
source_adapter.read(source_key),
|
|
47
|
+
content_type: source_metadata.content_type,
|
|
48
|
+
metadata: source_metadata.metadata || {}
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def head(key)
|
|
53
|
+
object = object_for(key)
|
|
54
|
+
body = object.fetch(:body)
|
|
55
|
+
|
|
56
|
+
Metadata.new(
|
|
57
|
+
byte_size: body.bytesize,
|
|
58
|
+
content_type: object[:content_type],
|
|
59
|
+
etag: Digest::MD5.hexdigest(body),
|
|
60
|
+
checksum: Digest::MD5.hexdigest(body),
|
|
61
|
+
checksum_algorithm: "md5",
|
|
62
|
+
metadata: object[:metadata] || {}
|
|
63
|
+
)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def exists?(key)
|
|
67
|
+
@objects.key?(key)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def delete(key)
|
|
71
|
+
@objects.delete(key)
|
|
72
|
+
true
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def url(key, **_options)
|
|
76
|
+
raise NotFoundError, "object #{key.inspect} not found" unless exists?(key)
|
|
77
|
+
|
|
78
|
+
"memory://#{config.name}/#{key}"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
private
|
|
82
|
+
|
|
83
|
+
def object_for(key)
|
|
84
|
+
@objects.fetch(key) do
|
|
85
|
+
raise NotFoundError, "object #{key.inspect} not found"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def read_upload_body(file)
|
|
90
|
+
return ::File.binread(file.path) if file.respond_to?(:path) && file.path
|
|
91
|
+
return file.read if file.respond_to?(:read)
|
|
92
|
+
|
|
93
|
+
file.to_s
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def detect_content_type(file)
|
|
97
|
+
file.content_type if file.respond_to?(:content_type)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ArchiveStorage
|
|
4
|
+
module Adapters
|
|
5
|
+
Metadata = Struct.new(
|
|
6
|
+
:byte_size,
|
|
7
|
+
:content_type,
|
|
8
|
+
:etag,
|
|
9
|
+
:checksum,
|
|
10
|
+
:checksum_algorithm,
|
|
11
|
+
:metadata,
|
|
12
|
+
keyword_init: true
|
|
13
|
+
) do
|
|
14
|
+
def multipart_etag?
|
|
15
|
+
etag.to_s.include?("-")
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def safe_etag?
|
|
19
|
+
etag && !multipart_etag?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|