s3_direct_multipart_upload 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +7 -0
  2. data/AGENTS.md +45 -0
  3. data/CODE_OF_CONDUCT.md +22 -0
  4. data/CONTRIBUTING.md +26 -0
  5. data/MIT-LICENSE +21 -0
  6. data/OPERATIONS.md +55 -0
  7. data/README.md +93 -0
  8. data/SPEC.md +62 -0
  9. data/app/controllers/s3_direct_multipart_upload/application_controller.rb +4 -0
  10. data/app/controllers/s3_direct_multipart_upload/dev/storage_controller.rb +62 -0
  11. data/app/controllers/s3_direct_multipart_upload/dev/storage_downloads_controller.rb +44 -0
  12. data/app/controllers/s3_direct_multipart_upload/upload_completions_controller.rb +32 -0
  13. data/app/controllers/s3_direct_multipart_upload/upload_parts_controller.rb +29 -0
  14. data/app/controllers/s3_direct_multipart_upload/upload_sessions_controller.rb +46 -0
  15. data/app/models/s3_direct_multipart_upload/upload_session.rb +394 -0
  16. data/app/models/s3_direct_multipart_upload/upload_session_part.rb +34 -0
  17. data/app/services/s3_direct_multipart_upload/dev/storage.rb +85 -0
  18. data/config/initializers/s3_direct_dev.rb +13 -0
  19. data/config/routes.rb +15 -0
  20. data/config/storage/development.yml +3 -0
  21. data/config/storage/test.yml +3 -0
  22. data/db/migrate/20251113090000_create_s3_direct_multipart_upload_tables.rb +32 -0
  23. data/doc/api.yml +256 -0
  24. data/doc/api_dev.yml +111 -0
  25. data/lib/generators/s3_direct_multipart_upload/install/install_generator.rb +14 -0
  26. data/lib/s3_direct_multipart_upload/engine.rb +16 -0
  27. data/lib/s3_direct_multipart_upload/version.rb +3 -0
  28. data/lib/s3_direct_multipart_upload.rb +9 -0
  29. data/lib/tasks/s3_direct_multipart_upload_tasks.rake +15 -0
  30. metadata +247 -0
@@ -0,0 +1,394 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: s3_direct_multipart_upload_sessions
4
+ #
5
+ # id :bigint not null, primary key
6
+ # bucket :string(255) not null
7
+ # byte_size :bigint not null
8
+ # chunk_size :integer
9
+ # content_type :string(255)
10
+ # expires_at :datetime indexed
11
+ # filename :string(255) not null
12
+ # key_prefix :string(255) not null
13
+ # metadata :json not null
14
+ # status :string(255) default("pending"), not null
15
+ # created_at :datetime not null
16
+ # updated_at :datetime not null
17
+ # session_id :string(255) not null, uniquely indexed
18
+ # upload_id :string(255) not null, uniquely indexed
19
+ #
20
+ require "digest"
21
+ require "erb"
22
+ require "uri"
23
+ require "yaml"
24
+
25
+ module S3DirectMultipartUpload
26
+ class UploadSession < ::ApplicationRecord
27
+ self.table_name = "s3_direct_multipart_upload_sessions"
28
+
29
+ MAX_PARTS = 10_000
30
+ MIN_MULTIPART_CHUNK_SIZE = 5.megabytes
31
+ MAX_CHUNK_SIZE = 5.gigabytes
32
+ SINGLE_PART_THRESHOLD = MIN_MULTIPART_CHUNK_SIZE
33
+ PRESIGNED_URL_TTL = 15.minutes
34
+
35
+ has_many :parts,
36
+ class_name: "S3DirectMultipartUpload::UploadSessionPart",
37
+ inverse_of: :upload_session,
38
+ dependent: :destroy
39
+
40
+ enum :status, {
41
+ pending: "pending",
42
+ uploading: "uploading",
43
+ completed: "completed",
44
+ aborted: "aborted"
45
+ }
46
+
47
+ validates :session_id, :upload_id, :bucket, :key_prefix, :status, presence: true
48
+ validates :session_id, :upload_id, uniqueness: true
49
+ validates :byte_size, numericality: { greater_than: 0 }
50
+ validates :chunk_size, numericality: { greater_than: 0 }, allow_nil: true
51
+ validate :validate_chunk_size_bounds, :validate_expected_parts
52
+
53
+ before_validation :assign_defaults, on: :create
54
+
55
+ def self.start!(filename:, byte_size:, content_type:, chunk_size: nil, metadata: {})
56
+ byte_size = byte_size.to_i
57
+ raise ArgumentError, "byte_size must be positive" if byte_size <= 0
58
+
59
+ resolved_chunk_size = resolve_chunk_size(byte_size:, chunk_size:)
60
+ expected_parts = expected_parts_for(byte_size:, chunk_size: resolved_chunk_size)
61
+ raise ArgumentError, "part number limit exceeded" if expected_parts > MAX_PARTS
62
+
63
+ session_id = SecureRandom.uuid
64
+ key_prefix = default_key_prefix(session_id)
65
+ object_key = build_object_key(key_prefix:, filename:)
66
+
67
+ response = s3_client.create_multipart_upload(
68
+ bucket: default_bucket,
69
+ key: object_key,
70
+ content_type:
71
+ )
72
+
73
+ create!(
74
+ session_id:,
75
+ upload_id: response.upload_id,
76
+ bucket: response.bucket || default_bucket,
77
+ key_prefix:,
78
+ filename:,
79
+ byte_size:,
80
+ content_type:,
81
+ chunk_size: resolved_chunk_size,
82
+ metadata: metadata
83
+ ).tap(&:schedule_expiration!)
84
+ end
85
+
86
+ def presigned_parts(part_numbers:)
87
+ raise ArgumentError, "part number limit exceeded" if part_numbers.size > MAX_PARTS
88
+
89
+ part_numbers.map do |number|
90
+ disk_service? ? dev_presigned_part(number) : aws_presigned_part(number)
91
+ end
92
+ end
93
+
94
+ def report_part!(part_number:, etag:)
95
+ part_number = part_number.to_i
96
+ raise ArgumentError, "part_number is invalid" if part_number.zero? || part_number.negative?
97
+ raise ArgumentError, "part_number exceeds expected parts" if expected_parts.positive? && part_number > expected_parts
98
+ raise ArgumentError, "upload already completed or aborted" if completed? || aborted?
99
+
100
+ transaction do
101
+ record = parts.find_or_initialize_by(part_number:)
102
+ record.update!(etag:, uploaded_at: Time.current)
103
+ uploading! if pending?
104
+ end
105
+ end
106
+
107
+ def complete!(checksum: nil)
108
+ missing = missing_parts
109
+ raise "missing parts: #{missing}" if missing.present?
110
+
111
+ self.class.s3_client.complete_multipart_upload(
112
+ bucket:,
113
+ key: object_key,
114
+ upload_id:,
115
+ multipart_upload: {
116
+ parts: parts.order(:part_number).map { { part_number: _1.part_number, etag: _1.etag } }
117
+ }
118
+ )
119
+ completed!
120
+ update!(metadata: metadata.merge("checksum" => checksum).compact)
121
+ combine_dev_parts_if_needed
122
+ self
123
+ end
124
+
125
+ def object_key
126
+ @object_key ||= self.class.build_object_key(key_prefix:, filename:)
127
+ end
128
+
129
+ def parts_uploaded?
130
+ expected_parts.positive? && missing_parts.empty?
131
+ end
132
+
133
+ def expected_parts
134
+ return 0 if chunk_size.blank? || byte_size.blank?
135
+
136
+ self.class.expected_parts_for(byte_size:, chunk_size:)
137
+ end
138
+
139
+ def part_limit
140
+ MAX_PARTS
141
+ end
142
+
143
+ def missing_parts
144
+ return [] unless expected_parts.positive?
145
+
146
+ uploaded = parts.pluck(:part_number).uniq.sort
147
+ expected = (1..expected_parts).to_a
148
+ expected - uploaded
149
+ end
150
+
151
+ def schedule_expiration!
152
+ update!(expires_at: 24.hours.from_now)
153
+ end
154
+
155
+ def default_chunk_size
156
+ self.class.default_chunk_size
157
+ end
158
+
159
+ private
160
+
161
+ def assign_defaults
162
+ self.session_id ||= SecureRandom.uuid
163
+ self.status ||= :pending
164
+ self.bucket ||= self.class.default_bucket
165
+ self.chunk_size ||= self.class.default_chunk_size
166
+ self.metadata ||= {}
167
+ self.key_prefix ||= self.class.default_key_prefix(session_id)
168
+ end
169
+
170
+ def presigner
171
+ @presigner ||= Aws::S3::Presigner.new(client: self.class.s3_client)
172
+ end
173
+
174
+ def disk_service?
175
+ self.class.disk_service?
176
+ end
177
+
178
+ def aws_presigned_part(number)
179
+ {
180
+ part_number: number,
181
+ url: presigner.presigned_url(
182
+ :upload_part,
183
+ bucket:,
184
+ key: object_key,
185
+ upload_id:,
186
+ part_number: number,
187
+ expires_in: PRESIGNED_URL_TTL.to_i
188
+ ),
189
+ expires_in: PRESIGNED_URL_TTL.to_i
190
+ }
191
+ end
192
+
193
+ def dev_presigned_part(number)
194
+ {
195
+ part_number: number,
196
+ url: self.class.dev_presigned_url(object_key:, upload_id:, part_number: number),
197
+ expires_in: PRESIGNED_URL_TTL.to_i
198
+ }
199
+ end
200
+
201
+ def validate_chunk_size_bounds
202
+ return if chunk_size.blank? || byte_size.blank?
203
+
204
+ if single_part?
205
+ errors.add(:chunk_size, "must equal byte_size for single part") unless chunk_size == byte_size
206
+ return
207
+ end
208
+
209
+ errors.add(:chunk_size, "must be at least #{MIN_MULTIPART_CHUNK_SIZE}") if chunk_size < MIN_MULTIPART_CHUNK_SIZE
210
+ errors.add(:chunk_size, "must be at most #{MAX_CHUNK_SIZE}") if chunk_size > MAX_CHUNK_SIZE
211
+ end
212
+
213
+ def validate_expected_parts
214
+ return if chunk_size.blank? || byte_size.blank?
215
+
216
+ errors.add(:base, "part number limit exceeded") if expected_parts > MAX_PARTS
217
+ end
218
+
219
+ def single_part?
220
+ byte_size.present? && byte_size < SINGLE_PART_THRESHOLD
221
+ end
222
+
223
+ def combine_dev_parts_if_needed
224
+ return unless self.class.dev_mode_enabled?
225
+ return unless self.class.disk_service?
226
+
227
+ S3DirectMultipartUpload::Dev::Storage.combine_parts(self)
228
+ rescue StandardError => e
229
+ Rails.logger.warn("[S3DirectMultipartUpload] failed to combine parts in dev mode: #{e.message}")
230
+ end
231
+
232
+ class << self
233
+ def default_bucket
234
+ storage_config.fetch(:bucket)
235
+ end
236
+
237
+ def default_key_prefix(session_id)
238
+ date_prefix = Time.current.strftime("%Y%m%d")
239
+ File.join(upload_namespace, date_prefix, session_id)
240
+ end
241
+
242
+ def upload_namespace
243
+ "uploads/s3_direct"
244
+ end
245
+
246
+ def default_chunk_size
247
+ 128.megabytes
248
+ end
249
+
250
+ def resolve_chunk_size(byte_size:, chunk_size:)
251
+ return byte_size if byte_size < SINGLE_PART_THRESHOLD
252
+
253
+ requested = chunk_size.presence&.to_i || default_chunk_size
254
+ raise ArgumentError, "chunk_size must be positive" if requested <= 0
255
+ raise ArgumentError, "chunk_size must be at least #{MIN_MULTIPART_CHUNK_SIZE}" if requested < MIN_MULTIPART_CHUNK_SIZE
256
+ raise ArgumentError, "chunk_size must be at most #{MAX_CHUNK_SIZE}" if requested > MAX_CHUNK_SIZE
257
+
258
+ requested
259
+ end
260
+
261
+ def expected_parts_for(byte_size:, chunk_size:)
262
+ (byte_size.to_f / chunk_size).ceil
263
+ end
264
+
265
+ def s3_client
266
+ @s3_client ||= begin
267
+ config = storage_config
268
+ client_options = {
269
+ region: config.fetch(:region),
270
+ endpoint: config[:endpoint],
271
+ force_path_style: config[:force_path_style],
272
+ stub_responses: stub_responses_option(config)
273
+ }.compact
274
+ Aws::S3::Client.new(**client_options)
275
+ end
276
+ end
277
+
278
+ def build_object_key(key_prefix:, filename:)
279
+ File.join(key_prefix, filename)
280
+ end
281
+
282
+ def storage_config
283
+ @storage_config ||= begin
284
+ config = storage_configurations["s3_direct_multipart"] || storage_configurations[:s3_direct_multipart]
285
+ raise configuration_error("ActiveStorage service `s3_direct_multipart` is not configured") unless config
286
+
287
+ config = normalize_config(config.deep_symbolize_keys)
288
+ validate_config!(config)
289
+ config
290
+ end
291
+ end
292
+
293
+ def storage_configurations
294
+ explicit = Rails.application.config.respond_to?(:active_storage) &&
295
+ Rails.application.config.active_storage.respond_to?(:service_configurations) &&
296
+ Rails.application.config.active_storage.service_configurations.present?
297
+ return Rails.application.config.active_storage.service_configurations if explicit
298
+
299
+ path = storage_config_path
300
+ raise configuration_error("ActiveStorage service configurations are empty; add config/storage.yml or config/storage/<env>.yml") unless path
301
+
302
+ erb = ERB.new(path.read).result
303
+ YAML.safe_load(erb, aliases: true) || {}
304
+ end
305
+
306
+ def storage_config_path
307
+ env_path = Rails.root.join("config/storage/#{Rails.env}.yml")
308
+ return env_path if env_path.exist?
309
+
310
+ default_path = Rails.root.join("config/storage.yml")
311
+ default_path if default_path.exist?
312
+ end
313
+
314
+ def validate_config!(config)
315
+ service = config[:service].to_s.downcase
316
+ raise configuration_error("ActiveStorage service `s3_direct_multipart` must set service: S3 or Disk") unless %w[s3 disk].include?(service)
317
+ raise configuration_error("ActiveStorage service `s3_direct_multipart` requires bucket") if config[:bucket].blank?
318
+ raise configuration_error("ActiveStorage service `s3_direct_multipart` requires region") if config[:region].blank?
319
+ end
320
+
321
+ def normalize_config(config)
322
+ service = config[:service].to_s.downcase
323
+ return config if service == "s3"
324
+
325
+ config.merge(
326
+ service: "Disk",
327
+ bucket: config[:bucket] || "s3_direct_disk",
328
+ region: config[:region] || "us-east-1",
329
+ stub_responses: true
330
+ )
331
+ end
332
+
333
+ def disk_service?
334
+ storage_config[:service].to_s.casecmp("disk").zero?
335
+ end
336
+
337
+ def dev_presigned_url(object_key:, upload_id:, part_number:)
338
+ raise configuration_error("dev presigned URLs are only available for Disk service") unless disk_service?
339
+
340
+ expires_at = PRESIGNED_URL_TTL.from_now
341
+ signature = dev_signature(
342
+ path: object_key,
343
+ expires_at:,
344
+ upload_id:,
345
+ part_number:
346
+ )
347
+
348
+ path = S3DirectMultipartUpload::Engine.routes.url_helpers.dev_storage_upload_path(path: object_key)
349
+ url = URI.join(dev_endpoint_base_url, path)
350
+ query = Rack::Utils.build_query(
351
+ uploadId: upload_id,
352
+ partNumber: part_number,
353
+ expires_at: expires_at.to_i,
354
+ signature:
355
+ )
356
+ url.query = query
357
+ url.to_s
358
+ end
359
+
360
+ def dev_signature(path:, expires_at:, upload_id:, part_number:)
361
+ secret = ENV.fetch("S3_DIRECT_DEV_SECRET", "dev-secret")
362
+ normalized_path = path.to_s.sub(/\A\//, "")
363
+ normalized_part = part_number.presence&.to_s
364
+ Digest::SHA256.hexdigest([ normalized_path, expires_at.to_i, upload_id.to_s, normalized_part, secret ].compact.join(":"))
365
+ end
366
+
367
+ def dev_endpoint_base_url
368
+ ENV.fetch("S3_DIRECT_DEV_ENDPOINT", "http://localhost:3000")
369
+ end
370
+
371
+ def stub_responses_option(config)
372
+ return config[:stub_responses] unless disk_service? && config[:stub_responses]
373
+ return config[:stub_responses] if config[:stub_responses].respond_to?(:to_hash)
374
+
375
+ {
376
+ create_multipart_upload: ->(*) {
377
+ {
378
+ upload_id: SecureRandom.uuid,
379
+ bucket: config[:bucket] || default_bucket
380
+ }
381
+ }
382
+ }
383
+ end
384
+
385
+ def dev_mode_enabled?
386
+ Rails.env.development? || Rails.env.test?
387
+ end
388
+
389
+ def configuration_error(message)
390
+ RuntimeError.new(message)
391
+ end
392
+ end
393
+ end
394
+ end
@@ -0,0 +1,34 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: s3_direct_multipart_upload_session_parts
4
+ #
5
+ # id :bigint not null, primary key
6
+ # etag :string(255) not null
7
+ # part_number :integer not null, uniquely indexed => [upload_session_id]
8
+ # uploaded_at :datetime not null
9
+ # upload_session_id :bigint not null, indexed, uniquely indexed => [part_number]
10
+ #
11
+ # Foreign Keys
12
+ #
13
+ # fk_rails_... (upload_session_id => s3_direct_multipart_upload_sessions.id)
14
+ #
15
+ module S3DirectMultipartUpload
16
+ class UploadSessionPart < ::ApplicationRecord
17
+ self.table_name = "s3_direct_multipart_upload_session_parts"
18
+
19
+ belongs_to :upload_session,
20
+ class_name: "S3DirectMultipartUpload::UploadSession",
21
+ inverse_of: :parts
22
+
23
+ validates :part_number, presence: true, numericality: { greater_than: 0 }
24
+ validates :etag, presence: true
25
+
26
+ after_create :touch_upload_session
27
+
28
+ private
29
+
30
+ def touch_upload_session
31
+ upload_session.touch
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,85 @@
1
+ require "fileutils"
2
+ require "stringio"
3
+
4
+ module S3DirectMultipartUpload
5
+ module Dev
6
+ module Storage
7
+ module_function
8
+
9
+ def root
10
+ Rails.root.join("tmp/s3_direct_multipart_upload")
11
+ end
12
+
13
+ def combined_path(upload_session)
14
+ root.join(upload_session.object_key.to_s.sub(/\A\//, ""))
15
+ end
16
+
17
+ def parts_dir(upload_session)
18
+ combined_path(upload_session).dirname.join("#{combined_path(upload_session).basename}.parts")
19
+ end
20
+
21
+ def combine_parts(upload_session)
22
+ raise "download is available only in development/test" unless dev_environment?
23
+ ensure_directories(upload_session)
24
+
25
+ if disk_service?(upload_session)
26
+ combine_disk_parts(upload_session)
27
+ else
28
+ download_s3_object(upload_session)
29
+ end
30
+ end
31
+
32
+ def combined_file_exist?(upload_session)
33
+ combined_path(upload_session).exist?
34
+ end
35
+
36
+ def dev_environment?
37
+ Rails.env.development? || Rails.env.test?
38
+ end
39
+
40
+ def disk_service?(upload_session)
41
+ upload_session.class.send(:disk_service?)
42
+ end
43
+
44
+ def ensure_directories(upload_session)
45
+ FileUtils.mkdir_p(combined_path(upload_session).dirname)
46
+ FileUtils.mkdir_p(parts_dir(upload_session))
47
+ end
48
+
49
+ def combine_disk_parts(upload_session)
50
+ expected = upload_session.expected_parts
51
+ raise "upload is not completed" unless upload_session.completed?
52
+
53
+ if expected <= 1
54
+ single_part_path = parts_dir(upload_session).join("1")
55
+ source = single_part_path.exist? ? single_part_path : combined_path(upload_session)
56
+ raise "part is missing" unless source.exist?
57
+
58
+ FileUtils.cp(source, combined_path(upload_session))
59
+ return combined_path(upload_session)
60
+ end
61
+
62
+ io = StringIO.new
63
+ (1..expected).each do |part_number|
64
+ part_path = parts_dir(upload_session).join(part_number.to_s)
65
+ raise "missing part #{part_number}" unless part_path.exist?
66
+
67
+ io.write(File.binread(part_path))
68
+ end
69
+
70
+ File.binwrite(combined_path(upload_session), io.string)
71
+ combined_path(upload_session)
72
+ end
73
+
74
+ def download_s3_object(upload_session)
75
+ client = upload_session.class.s3_client
76
+ client.get_object(
77
+ bucket: upload_session.bucket,
78
+ key: upload_session.object_key,
79
+ response_target: combined_path(upload_session)
80
+ )
81
+ combined_path(upload_session)
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ if Rails.env.development? || Rails.env.test?
4
+ Aws.config[:s3] ||= {}
5
+ Aws.config[:s3][:stub_responses] = {
6
+ create_multipart_upload: ->(*) {
7
+ {
8
+ upload_id: SecureRandom.uuid,
9
+ bucket: "dev-bucket"
10
+ }
11
+ }
12
+ }
13
+ end
data/config/routes.rb ADDED
@@ -0,0 +1,15 @@
1
+ S3DirectMultipartUpload::Engine.routes.draw do
2
+ post "s3_direct_multipart_uploads", to: "upload_sessions#create", as: :s3_direct_multipart_uploads
3
+ post "s3_direct_multipart_uploads/:upload_session_id/parts", to: "upload_parts#create", as: :s3_direct_multipart_upload_parts
4
+ post "s3_direct_multipart_uploads/:upload_session_id/complete", to: "upload_completions#create", as: :s3_direct_multipart_upload_complete
5
+
6
+ if Rails.env.development? || Rails.env.test?
7
+ put "s3_direct_multipart_upload/dev/storage/*path",
8
+ to: "dev/storage#upload",
9
+ as: :dev_storage_upload,
10
+ format: false
11
+ get "s3_direct_multipart_upload/dev/storage/:upload_session_id/download",
12
+ to: "dev/storage_downloads#show",
13
+ as: :dev_storage_download
14
+ end
15
+ end
@@ -0,0 +1,3 @@
1
+ s3_direct_multipart:
2
+ service: Disk
3
+ root: <%= Rails.root.join('tmp/storage') %>
@@ -0,0 +1,3 @@
1
+ s3_direct_multipart:
2
+ service: Disk
3
+ root: <%= Rails.root.join('tmp/storage') %>
@@ -0,0 +1,32 @@
1
+ class CreateS3DirectMultipartUploadTables < ActiveRecord::Migration[7.1]
2
+ def change
3
+ create_table :s3_direct_multipart_upload_sessions do |t|
4
+ t.string :session_id, null: false
5
+ t.string :upload_id, null: false
6
+ t.string :bucket, null: false
7
+ t.string :key_prefix, null: false
8
+ t.string :status, null: false, default: 'pending'
9
+ t.bigint :byte_size, null: false
10
+ t.string :filename, null: false
11
+ t.string :content_type
12
+ t.integer :chunk_size
13
+ t.datetime :expires_at
14
+ t.json :metadata, null: false
15
+
16
+ t.timestamps
17
+
18
+ t.index :session_id, unique: true
19
+ t.index :upload_id, unique: true
20
+ t.index :expires_at
21
+ end
22
+
23
+ create_table :s3_direct_multipart_upload_session_parts do |t|
24
+ t.references :upload_session, null: false, foreign_key: { to_table: :s3_direct_multipart_upload_sessions }
25
+ t.integer :part_number, null: false
26
+ t.string :etag, null: false
27
+ t.datetime :uploaded_at, null: false
28
+
29
+ t.index [ :upload_session_id, :part_number ], unique: true, name: 'index_s3_direct_parts_on_session_and_number'
30
+ end
31
+ end
32
+ end