aws-sdk-s3 1.10.0 → 1.208.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +1517 -0
  3. data/LICENSE.txt +202 -0
  4. data/VERSION +1 -0
  5. data/lib/aws-sdk-s3/access_grants_credentials.rb +57 -0
  6. data/lib/aws-sdk-s3/access_grants_credentials_provider.rb +250 -0
  7. data/lib/aws-sdk-s3/bucket.rb +1062 -99
  8. data/lib/aws-sdk-s3/bucket_acl.rb +67 -17
  9. data/lib/aws-sdk-s3/bucket_cors.rb +80 -17
  10. data/lib/aws-sdk-s3/bucket_lifecycle.rb +71 -19
  11. data/lib/aws-sdk-s3/bucket_lifecycle_configuration.rb +126 -20
  12. data/lib/aws-sdk-s3/bucket_logging.rb +68 -18
  13. data/lib/aws-sdk-s3/bucket_notification.rb +56 -20
  14. data/lib/aws-sdk-s3/bucket_policy.rb +108 -17
  15. data/lib/aws-sdk-s3/bucket_region_cache.rb +11 -5
  16. data/lib/aws-sdk-s3/bucket_request_payment.rb +60 -15
  17. data/lib/aws-sdk-s3/bucket_tagging.rb +71 -17
  18. data/lib/aws-sdk-s3/bucket_versioning.rb +166 -17
  19. data/lib/aws-sdk-s3/bucket_website.rb +78 -17
  20. data/lib/aws-sdk-s3/client.rb +20068 -3879
  21. data/lib/aws-sdk-s3/client_api.rb +1957 -209
  22. data/lib/aws-sdk-s3/customizations/bucket.rb +57 -38
  23. data/lib/aws-sdk-s3/customizations/errors.rb +40 -0
  24. data/lib/aws-sdk-s3/customizations/multipart_upload.rb +2 -0
  25. data/lib/aws-sdk-s3/customizations/object.rb +338 -68
  26. data/lib/aws-sdk-s3/customizations/object_summary.rb +17 -0
  27. data/lib/aws-sdk-s3/customizations/object_version.rb +13 -0
  28. data/lib/aws-sdk-s3/customizations/types/list_object_versions_output.rb +2 -0
  29. data/lib/aws-sdk-s3/customizations/types/permanent_redirect.rb +26 -0
  30. data/lib/aws-sdk-s3/customizations.rb +30 -27
  31. data/lib/aws-sdk-s3/default_executor.rb +103 -0
  32. data/lib/aws-sdk-s3/encryption/client.rb +29 -8
  33. data/lib/aws-sdk-s3/encryption/decrypt_handler.rb +71 -29
  34. data/lib/aws-sdk-s3/encryption/default_cipher_provider.rb +45 -5
  35. data/lib/aws-sdk-s3/encryption/default_key_provider.rb +2 -0
  36. data/lib/aws-sdk-s3/encryption/encrypt_handler.rb +15 -2
  37. data/lib/aws-sdk-s3/encryption/errors.rb +2 -0
  38. data/lib/aws-sdk-s3/encryption/io_auth_decrypter.rb +11 -3
  39. data/lib/aws-sdk-s3/encryption/io_decrypter.rb +11 -3
  40. data/lib/aws-sdk-s3/encryption/io_encrypter.rb +2 -0
  41. data/lib/aws-sdk-s3/encryption/key_provider.rb +2 -0
  42. data/lib/aws-sdk-s3/encryption/kms_cipher_provider.rb +48 -11
  43. data/lib/aws-sdk-s3/encryption/materials.rb +8 -6
  44. data/lib/aws-sdk-s3/encryption/utils.rb +25 -0
  45. data/lib/aws-sdk-s3/encryption.rb +4 -0
  46. data/lib/aws-sdk-s3/encryptionV2/client.rb +645 -0
  47. data/lib/aws-sdk-s3/encryptionV2/decrypt_handler.rb +68 -0
  48. data/lib/aws-sdk-s3/encryptionV2/decryption.rb +205 -0
  49. data/lib/aws-sdk-s3/encryptionV2/default_cipher_provider.rb +187 -0
  50. data/lib/aws-sdk-s3/encryptionV2/default_key_provider.rb +40 -0
  51. data/lib/aws-sdk-s3/encryptionV2/encrypt_handler.rb +67 -0
  52. data/lib/aws-sdk-s3/encryptionV2/errors.rb +37 -0
  53. data/lib/aws-sdk-s3/encryptionV2/io_auth_decrypter.rb +58 -0
  54. data/lib/aws-sdk-s3/encryptionV2/io_decrypter.rb +37 -0
  55. data/lib/aws-sdk-s3/encryptionV2/io_encrypter.rb +75 -0
  56. data/lib/aws-sdk-s3/encryptionV2/key_provider.rb +31 -0
  57. data/lib/aws-sdk-s3/encryptionV2/kms_cipher_provider.rb +181 -0
  58. data/lib/aws-sdk-s3/encryptionV2/materials.rb +60 -0
  59. data/lib/aws-sdk-s3/encryptionV2/utils.rb +108 -0
  60. data/lib/aws-sdk-s3/encryptionV3/client.rb +885 -0
  61. data/lib/aws-sdk-s3/encryptionV3/decrypt_handler.rb +98 -0
  62. data/lib/aws-sdk-s3/encryptionV3/decryption.rb +244 -0
  63. data/lib/aws-sdk-s3/encryptionV3/default_cipher_provider.rb +159 -0
  64. data/lib/aws-sdk-s3/encryptionV3/default_key_provider.rb +35 -0
  65. data/lib/aws-sdk-s3/encryptionV3/encrypt_handler.rb +98 -0
  66. data/lib/aws-sdk-s3/encryptionV3/errors.rb +47 -0
  67. data/lib/aws-sdk-s3/encryptionV3/io_auth_decrypter.rb +60 -0
  68. data/lib/aws-sdk-s3/encryptionV3/io_decrypter.rb +35 -0
  69. data/lib/aws-sdk-s3/encryptionV3/io_encrypter.rb +84 -0
  70. data/lib/aws-sdk-s3/encryptionV3/key_provider.rb +28 -0
  71. data/lib/aws-sdk-s3/encryptionV3/kms_cipher_provider.rb +159 -0
  72. data/lib/aws-sdk-s3/encryptionV3/materials.rb +58 -0
  73. data/lib/aws-sdk-s3/encryptionV3/utils.rb +321 -0
  74. data/lib/aws-sdk-s3/encryption_v2.rb +24 -0
  75. data/lib/aws-sdk-s3/encryption_v3.rb +24 -0
  76. data/lib/aws-sdk-s3/endpoint_parameters.rb +181 -0
  77. data/lib/aws-sdk-s3/endpoint_provider.rb +886 -0
  78. data/lib/aws-sdk-s3/endpoints.rb +1544 -0
  79. data/lib/aws-sdk-s3/errors.rb +181 -1
  80. data/lib/aws-sdk-s3/event_streams.rb +69 -0
  81. data/lib/aws-sdk-s3/express_credentials.rb +55 -0
  82. data/lib/aws-sdk-s3/express_credentials_provider.rb +59 -0
  83. data/lib/aws-sdk-s3/file_downloader.rb +261 -82
  84. data/lib/aws-sdk-s3/file_part.rb +16 -13
  85. data/lib/aws-sdk-s3/file_uploader.rb +37 -22
  86. data/lib/aws-sdk-s3/legacy_signer.rb +19 -26
  87. data/lib/aws-sdk-s3/multipart_download_error.rb +8 -0
  88. data/lib/aws-sdk-s3/multipart_file_uploader.rb +142 -80
  89. data/lib/aws-sdk-s3/multipart_stream_uploader.rb +191 -0
  90. data/lib/aws-sdk-s3/multipart_upload.rb +342 -31
  91. data/lib/aws-sdk-s3/multipart_upload_error.rb +5 -4
  92. data/lib/aws-sdk-s3/multipart_upload_part.rb +387 -47
  93. data/lib/aws-sdk-s3/object.rb +2733 -204
  94. data/lib/aws-sdk-s3/object_acl.rb +112 -25
  95. data/lib/aws-sdk-s3/object_copier.rb +9 -5
  96. data/lib/aws-sdk-s3/object_multipart_copier.rb +50 -23
  97. data/lib/aws-sdk-s3/object_summary.rb +2265 -181
  98. data/lib/aws-sdk-s3/object_version.rb +542 -74
  99. data/lib/aws-sdk-s3/plugins/accelerate.rb +17 -64
  100. data/lib/aws-sdk-s3/plugins/access_grants.rb +178 -0
  101. data/lib/aws-sdk-s3/plugins/arn.rb +70 -0
  102. data/lib/aws-sdk-s3/plugins/bucket_dns.rb +7 -43
  103. data/lib/aws-sdk-s3/plugins/bucket_name_restrictions.rb +20 -3
  104. data/lib/aws-sdk-s3/plugins/checksum_algorithm.rb +31 -0
  105. data/lib/aws-sdk-s3/plugins/dualstack.rb +7 -50
  106. data/lib/aws-sdk-s3/plugins/endpoints.rb +86 -0
  107. data/lib/aws-sdk-s3/plugins/expect_100_continue.rb +5 -4
  108. data/lib/aws-sdk-s3/plugins/express_session_auth.rb +88 -0
  109. data/lib/aws-sdk-s3/plugins/get_bucket_location_fix.rb +3 -1
  110. data/lib/aws-sdk-s3/plugins/http_200_errors.rb +62 -17
  111. data/lib/aws-sdk-s3/plugins/iad_regional_endpoint.rb +44 -0
  112. data/lib/aws-sdk-s3/plugins/location_constraint.rb +5 -1
  113. data/lib/aws-sdk-s3/plugins/md5s.rb +14 -67
  114. data/lib/aws-sdk-s3/plugins/redirects.rb +5 -1
  115. data/lib/aws-sdk-s3/plugins/s3_host_id.rb +2 -0
  116. data/lib/aws-sdk-s3/plugins/s3_signer.rb +67 -93
  117. data/lib/aws-sdk-s3/plugins/sse_cpk.rb +3 -1
  118. data/lib/aws-sdk-s3/plugins/streaming_retry.rb +137 -0
  119. data/lib/aws-sdk-s3/plugins/url_encoded_keys.rb +4 -1
  120. data/lib/aws-sdk-s3/presigned_post.rb +160 -99
  121. data/lib/aws-sdk-s3/presigner.rb +178 -81
  122. data/lib/aws-sdk-s3/resource.rb +164 -15
  123. data/lib/aws-sdk-s3/transfer_manager.rb +303 -0
  124. data/lib/aws-sdk-s3/types.rb +15981 -4168
  125. data/lib/aws-sdk-s3/waiters.rb +67 -1
  126. data/lib/aws-sdk-s3.rb +46 -31
  127. data/sig/bucket.rbs +231 -0
  128. data/sig/bucket_acl.rbs +78 -0
  129. data/sig/bucket_cors.rbs +69 -0
  130. data/sig/bucket_lifecycle.rbs +88 -0
  131. data/sig/bucket_lifecycle_configuration.rbs +115 -0
  132. data/sig/bucket_logging.rbs +76 -0
  133. data/sig/bucket_notification.rbs +114 -0
  134. data/sig/bucket_policy.rbs +59 -0
  135. data/sig/bucket_request_payment.rbs +54 -0
  136. data/sig/bucket_tagging.rbs +65 -0
  137. data/sig/bucket_versioning.rbs +77 -0
  138. data/sig/bucket_website.rbs +93 -0
  139. data/sig/client.rbs +2612 -0
  140. data/sig/customizations/bucket.rbs +19 -0
  141. data/sig/customizations/object.rbs +38 -0
  142. data/sig/customizations/object_summary.rbs +35 -0
  143. data/sig/errors.rbs +44 -0
  144. data/sig/multipart_upload.rbs +120 -0
  145. data/sig/multipart_upload_part.rbs +109 -0
  146. data/sig/object.rbs +464 -0
  147. data/sig/object_acl.rbs +86 -0
  148. data/sig/object_summary.rbs +347 -0
  149. data/sig/object_version.rbs +143 -0
  150. data/sig/resource.rbs +141 -0
  151. data/sig/types.rbs +2899 -0
  152. data/sig/waiters.rbs +95 -0
  153. metadata +97 -14
@@ -1,49 +1,46 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'pathname'
2
- require 'thread'
3
4
  require 'set'
4
5
 
5
6
  module Aws
6
7
  module S3
7
8
  # @api private
8
9
  class MultipartFileUploader
9
-
10
10
  MIN_PART_SIZE = 5 * 1024 * 1024 # 5MB
11
-
12
- FILE_TOO_SMALL = "unable to multipart upload files smaller than 5MB"
13
-
14
11
  MAX_PARTS = 10_000
15
-
16
- THREAD_COUNT = 10
17
-
18
- # @api private
19
- CREATE_OPTIONS =
20
- Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
21
-
22
- # @api private
23
- UPLOAD_PART_OPTIONS =
24
- Set.new(Client.api.operation(:upload_part).input.shape.member_names)
12
+ CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
13
+ COMPLETE_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
14
+ UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
15
+ CHECKSUM_KEYS = Set.new(
16
+ Client.api.operation(:upload_part).input.shape.members.map do |n, s|
17
+ n if s.location == 'header' && s.location_name.start_with?('x-amz-checksum-')
18
+ end.compact
19
+ )
25
20
 
26
21
  # @option options [Client] :client
27
22
  def initialize(options = {})
28
23
  @client = options[:client] || Client.new
29
- @thread_count = options[:thread_count] || THREAD_COUNT
24
+ @executor = options[:executor]
30
25
  end
31
26
 
32
27
  # @return [Client]
33
28
  attr_reader :client
34
29
 
35
- # @param [String,Pathname,File,Tempfile] source
36
- # @option options [required,String] :bucket
37
- # @option options [required,String] :key
38
- # @return [void]
30
+ # @param [String, Pathname, File, Tempfile] source The file to upload.
31
+ # @option options [required, String] :bucket The bucket to upload to.
32
+ # @option options [required, String] :key The key for the object.
33
+ # @option options [Proc] :progress_callback
34
+ # A Proc that will be called when each chunk of the upload is sent.
35
+ # It will be invoked with [bytes_read], [total_sizes]
36
+ # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
39
37
  def upload(source, options = {})
40
- if File.size(source) < MIN_PART_SIZE
41
- raise ArgumentError, FILE_TOO_SMALL
42
- else
43
- upload_id = initiate_upload(options)
44
- parts = upload_parts(upload_id, source, options)
45
- complete_upload(upload_id, parts, options)
46
- end
38
+ file_size = File.size(source)
39
+ raise ArgumentError, 'unable to multipart upload files smaller than 5MB' if file_size < MIN_PART_SIZE
40
+
41
+ upload_id = initiate_upload(options)
42
+ parts = upload_parts(upload_id, source, file_size, options)
43
+ complete_upload(upload_id, parts, file_size, options)
47
44
  end
48
45
 
49
46
  private
@@ -52,18 +49,21 @@ module Aws
52
49
  @client.create_multipart_upload(create_opts(options)).upload_id
53
50
  end
54
51
 
55
- def complete_upload(upload_id, parts, options)
52
+ def complete_upload(upload_id, parts, file_size, options)
56
53
  @client.complete_multipart_upload(
57
- bucket: options[:bucket],
58
- key: options[:key],
54
+ **complete_opts(options),
59
55
  upload_id: upload_id,
60
- multipart_upload: { parts: parts })
56
+ multipart_upload: { parts: parts },
57
+ mpu_object_size: file_size
58
+ )
59
+ rescue StandardError => e
60
+ abort_upload(upload_id, options, [e])
61
61
  end
62
62
 
63
- def upload_parts(upload_id, source, options)
64
- pending = PartList.new(compute_parts(upload_id, source, options))
63
+ def upload_parts(upload_id, source, file_size, options)
65
64
  completed = PartList.new
66
- errors = upload_in_threads(pending, completed)
65
+ pending = PartList.new(compute_parts(upload_id, source, file_size, options))
66
+ errors = upload_with_executor(pending, completed, options)
67
67
  if errors.empty?
68
68
  completed.to_a.sort_by { |part| part[:part_number] }
69
69
  else
@@ -72,81 +72,111 @@ module Aws
72
72
  end
73
73
 
74
74
  def abort_upload(upload_id, options, errors)
75
- @client.abort_multipart_upload(
76
- bucket: options[:bucket],
77
- key: options[:key],
78
- upload_id: upload_id
79
- )
80
- msg = "multipart upload failed: #{errors.map(&:message).join("; ")}"
75
+ @client.abort_multipart_upload(bucket: options[:bucket], key: options[:key], upload_id: upload_id)
76
+ msg = "multipart upload failed: #{errors.map(&:message).join('; ')}"
81
77
  raise MultipartUploadError.new(msg, errors)
82
- rescue MultipartUploadError => error
83
- raise error
84
- rescue => error
85
- msg = "failed to abort multipart upload: #{error.message}"
86
- raise MultipartUploadError.new(msg, errors + [error])
78
+ rescue MultipartUploadError => e
79
+ raise e
80
+ rescue StandardError => e
81
+ msg = "failed to abort multipart upload: #{e.message}. " \
82
+ "Multipart upload failed: #{errors.map(&:message).join('; ')}"
83
+ raise MultipartUploadError.new(msg, errors + [e])
87
84
  end
88
85
 
89
- def compute_parts(upload_id, source, options)
90
- size = File.size(source)
91
- default_part_size = compute_default_part_size(size)
86
+ def compute_parts(upload_id, source, file_size, options)
87
+ default_part_size = compute_default_part_size(file_size)
92
88
  offset = 0
93
89
  part_number = 1
94
90
  parts = []
95
- while offset < size
96
- parts << upload_part_opts(options).merge({
91
+ while offset < file_size
92
+ parts << upload_part_opts(options).merge(
97
93
  upload_id: upload_id,
98
94
  part_number: part_number,
99
95
  body: FilePart.new(
100
96
  source: source,
101
97
  offset: offset,
102
- size: part_size(size, default_part_size, offset)
98
+ size: part_size(file_size, default_part_size, offset)
103
99
  )
104
- })
100
+ )
105
101
  part_number += 1
106
102
  offset += default_part_size
107
103
  end
108
104
  parts
109
105
  end
110
106
 
107
+ def checksum_key?(key)
108
+ CHECKSUM_KEYS.include?(key)
109
+ end
110
+
111
+ def has_checksum_key?(keys)
112
+ keys.any? { |key| checksum_key?(key) }
113
+ end
114
+
115
+ def checksum_not_required?(options)
116
+ @client.config.request_checksum_calculation == 'when_required' && !options[:checksum_algorithm]
117
+ end
118
+
111
119
  def create_opts(options)
112
- CREATE_OPTIONS.inject({}) do |hash, key|
113
- hash[key] = options[key] if options.key?(key)
114
- hash
120
+ opts = {}
121
+ unless checksum_not_required?(options)
122
+ opts[:checksum_algorithm] = Aws::Plugins::ChecksumAlgorithm::DEFAULT_CHECKSUM
115
123
  end
124
+ opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
125
+ CREATE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
126
+ end
127
+
128
+ def complete_opts(options)
129
+ opts = {}
130
+ opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
131
+ COMPLETE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
116
132
  end
117
133
 
118
134
  def upload_part_opts(options)
119
- UPLOAD_PART_OPTIONS.inject({}) do |hash, key|
120
- hash[key] = options[key] if options.key?(key)
121
- hash
135
+ UPLOAD_PART_OPTIONS.each_with_object({}) do |key, hash|
136
+ # don't pass through checksum calculations
137
+ hash[key] = options[key] if options.key?(key) && !checksum_key?(key)
122
138
  end
123
139
  end
124
140
 
125
- def upload_in_threads(pending, completed)
126
- threads = []
127
- @thread_count.times do
128
- thread = Thread.new do
129
- begin
130
- while part = pending.shift
131
- resp = @client.upload_part(part)
132
- part[:body].close
133
- completed.push(etag: resp.etag, part_number: part[:part_number])
134
- end
135
- nil
136
- rescue => error
137
- # keep other threads from uploading other parts
138
- pending.clear!
139
- error
140
- end
141
+ def upload_with_executor(pending, completed, options)
142
+ upload_attempts = 0
143
+ completion_queue = Queue.new
144
+ abort_upload = false
145
+ errors = []
146
+ progress = MultipartProgress.new(pending, options[:progress_callback])
147
+
148
+ while (part = pending.shift)
149
+ break if abort_upload
150
+
151
+ upload_attempts += 1
152
+ @executor.post(part) do |p|
153
+ update_progress(progress, p)
154
+ resp = @client.upload_part(p)
155
+ p[:body].close
156
+ completed_part = { etag: resp.etag, part_number: p[:part_number] }
157
+ apply_part_checksum(resp, completed_part)
158
+ completed.push(completed_part)
159
+ rescue StandardError => e
160
+ abort_upload = true
161
+ errors << e
162
+ ensure
163
+ completion_queue << :done
141
164
  end
142
- thread.abort_on_exception = true
143
- threads << thread
144
165
  end
145
- threads.map(&:value).compact
166
+
167
+ upload_attempts.times { completion_queue.pop }
168
+ errors
169
+ end
170
+
171
+ def apply_part_checksum(resp, part)
172
+ return unless (checksum = resp.context.params[:checksum_algorithm])
173
+
174
+ k = :"checksum_#{checksum.downcase}"
175
+ part[k] = resp.send(k)
146
176
  end
147
177
 
148
- def compute_default_part_size(source_size)
149
- [(source_size.to_f / MAX_PARTS).ceil, MIN_PART_SIZE].max.to_i
178
+ def compute_default_part_size(file_size)
179
+ [(file_size.to_f / MAX_PARTS).ceil, MIN_PART_SIZE].max.to_i
150
180
  end
151
181
 
152
182
  def part_size(total_size, part_size, offset)
@@ -157,9 +187,17 @@ module Aws
157
187
  end
158
188
  end
159
189
 
190
+ def update_progress(progress, part)
191
+ return unless progress.progress_callback
192
+
193
+ part[:on_chunk_sent] =
194
+ proc do |_chunk, bytes, _total|
195
+ progress.call(part[:part_number], bytes)
196
+ end
197
+ end
198
+
160
199
  # @api private
161
200
  class PartList
162
-
163
201
  def initialize(parts = [])
164
202
  @parts = parts
165
203
  @mutex = Mutex.new
@@ -177,10 +215,34 @@ module Aws
177
215
  @mutex.synchronize { @parts.clear }
178
216
  end
179
217
 
218
+ def size
219
+ @mutex.synchronize { @parts.size }
220
+ end
221
+
222
+ def part_sizes
223
+ @mutex.synchronize { @parts.map { |p| p[:body].size } }
224
+ end
225
+
180
226
  def to_a
181
227
  @mutex.synchronize { @parts.dup }
182
228
  end
229
+ end
230
+
231
+ # @api private
232
+ class MultipartProgress
233
+ def initialize(parts, progress_callback)
234
+ @bytes_sent = Array.new(parts.size, 0)
235
+ @total_sizes = parts.part_sizes
236
+ @progress_callback = progress_callback
237
+ end
183
238
 
239
+ attr_reader :progress_callback
240
+
241
+ def call(part_number, bytes_read)
242
+ # part numbers start at 1
243
+ @bytes_sent[part_number - 1] = bytes_read
244
+ @progress_callback.call(@bytes_sent, @total_sizes)
245
+ end
184
246
  end
185
247
  end
186
248
  end
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'thread'
4
+ require 'set'
5
+ require 'tempfile'
6
+ require 'stringio'
7
+
8
+ module Aws
9
+ module S3
10
+ # @api private
11
+ class MultipartStreamUploader
12
+
13
+ DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB
14
+ CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
15
+ UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
16
+ COMPLETE_UPLOAD_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
17
+
18
+ # @option options [Client] :client
19
+ def initialize(options = {})
20
+ @client = options[:client] || Client.new
21
+ @executor = options[:executor]
22
+ @tempfile = options[:tempfile]
23
+ @part_size = options[:part_size] || DEFAULT_PART_SIZE
24
+ end
25
+
26
+ # @return [Client]
27
+ attr_reader :client
28
+
29
+ # @option options [required,String] :bucket
30
+ # @option options [required,String] :key
31
+ # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
32
+ def upload(options = {}, &block)
33
+ Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
34
+ upload_id = initiate_upload(options)
35
+ parts = upload_parts(upload_id, options, &block)
36
+ complete_upload(upload_id, parts, options)
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def initiate_upload(options)
43
+ @client.create_multipart_upload(create_opts(options)).upload_id
44
+ end
45
+
46
+ def complete_upload(upload_id, parts, options)
47
+ @client.complete_multipart_upload(
48
+ **complete_opts(options).merge(upload_id: upload_id, multipart_upload: { parts: parts })
49
+ )
50
+ rescue StandardError => e
51
+ abort_upload(upload_id, options, [e])
52
+ end
53
+
54
+ def upload_parts(upload_id, options, &block)
55
+ completed_parts = Queue.new
56
+ errors = []
57
+
58
+ begin
59
+ IO.pipe do |read_pipe, write_pipe|
60
+ upload_thread = Thread.new do
61
+ upload_with_executor(
62
+ read_pipe,
63
+ completed_parts,
64
+ errors,
65
+ upload_part_opts(options).merge(upload_id: upload_id)
66
+ )
67
+ end
68
+
69
+ block.call(write_pipe)
70
+ ensure
71
+ # Ensure the pipe is closed to avoid https://github.com/jruby/jruby/issues/6111
72
+ write_pipe.close
73
+ upload_thread.join
74
+ end
75
+ rescue StandardError => e
76
+ errors << e
77
+ end
78
+ return ordered_parts(completed_parts) if errors.empty?
79
+
80
+ abort_upload(upload_id, options, errors)
81
+ end
82
+
83
+ def abort_upload(upload_id, options, errors)
84
+ @client.abort_multipart_upload(bucket: options[:bucket], key: options[:key], upload_id: upload_id)
85
+ msg = "multipart upload failed: #{errors.map(&:message).join('; ')}"
86
+ raise MultipartUploadError.new(msg, errors)
87
+ rescue MultipartUploadError => e
88
+ raise e
89
+ rescue StandardError => e
90
+ msg = "failed to abort multipart upload: #{e.message}. "\
91
+ "Multipart upload failed: #{errors.map(&:message).join('; ')}"
92
+ raise MultipartUploadError.new(msg, errors + [e])
93
+ end
94
+
95
+ def create_opts(options)
96
+ CREATE_OPTIONS.each_with_object({}) do |key, hash|
97
+ hash[key] = options[key] if options.key?(key)
98
+ end
99
+ end
100
+
101
+ def upload_part_opts(options)
102
+ UPLOAD_PART_OPTIONS.each_with_object({}) do |key, hash|
103
+ hash[key] = options[key] if options.key?(key)
104
+ end
105
+ end
106
+
107
+ def complete_opts(options)
108
+ COMPLETE_UPLOAD_OPTIONS.each_with_object({}) do |key, hash|
109
+ hash[key] = options[key] if options.key?(key)
110
+ end
111
+ end
112
+
113
+ def read_to_part_body(read_pipe)
114
+ return if read_pipe.closed?
115
+
116
+ temp_io = @tempfile ? Tempfile.new('aws-sdk-s3-upload_stream') : StringIO.new(String.new)
117
+ temp_io.binmode
118
+ bytes_copied = IO.copy_stream(read_pipe, temp_io, @part_size)
119
+ temp_io.rewind
120
+ if bytes_copied.zero?
121
+ if temp_io.is_a?(Tempfile)
122
+ temp_io.close
123
+ temp_io.unlink
124
+ end
125
+ nil
126
+ else
127
+ temp_io
128
+ end
129
+ end
130
+
131
+ def upload_with_executor(read_pipe, completed, errors, options)
132
+ completion_queue = Queue.new
133
+ queued_parts = 0
134
+ part_number = 0
135
+ mutex = Mutex.new
136
+ loop do
137
+ part_body, current_part_num = mutex.synchronize do
138
+ [read_to_part_body(read_pipe), part_number += 1]
139
+ end
140
+ break unless part_body || current_part_num == 1
141
+
142
+ queued_parts += 1
143
+ @executor.post(part_body, current_part_num, options) do |body, num, opts|
144
+ part = opts.merge(body: body, part_number: num)
145
+ resp = @client.upload_part(part)
146
+ completed_part = create_completed_part(resp, part)
147
+ completed.push(completed_part)
148
+ rescue StandardError => e
149
+ mutex.synchronize do
150
+ errors.push(e)
151
+ read_pipe.close_read unless read_pipe.closed?
152
+ end
153
+ ensure
154
+ clear_body(body)
155
+ completion_queue << :done
156
+ end
157
+ end
158
+ queued_parts.times { completion_queue.pop }
159
+ end
160
+
161
+ def create_completed_part(resp, part)
162
+ completed_part = { etag: resp.etag, part_number: part[:part_number] }
163
+ return completed_part unless part[:checksum_algorithm]
164
+
165
+ # get the requested checksum from the response
166
+ k = "checksum_#{part[:checksum_algorithm].downcase}".to_sym
167
+ completed_part[k] = resp[k]
168
+ completed_part
169
+ end
170
+
171
+ def ordered_parts(parts)
172
+ sorted = []
173
+ until parts.empty?
174
+ part = parts.pop
175
+ index = sorted.bsearch_index { |p| p[:part_number] >= part[:part_number] } || sorted.size
176
+ sorted.insert(index, part)
177
+ end
178
+ sorted
179
+ end
180
+
181
+ def clear_body(body)
182
+ if body.is_a?(Tempfile)
183
+ body.close
184
+ body.unlink
185
+ elsif body.is_a?(StringIO)
186
+ body.string.clear
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end