aws-sdk-s3 1.192.0 → 1.201.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,27 +7,11 @@ module Aws
7
7
  module S3
8
8
  # @api private
9
9
  class MultipartFileUploader
10
-
11
10
  MIN_PART_SIZE = 5 * 1024 * 1024 # 5MB
12
-
13
- FILE_TOO_SMALL = "unable to multipart upload files smaller than 5MB"
14
-
15
11
  MAX_PARTS = 10_000
16
-
17
- THREAD_COUNT = 10
18
-
19
- CREATE_OPTIONS = Set.new(
20
- Client.api.operation(:create_multipart_upload).input.shape.member_names
21
- )
22
-
23
- COMPLETE_OPTIONS = Set.new(
24
- Client.api.operation(:complete_multipart_upload).input.shape.member_names
25
- )
26
-
27
- UPLOAD_PART_OPTIONS = Set.new(
28
- Client.api.operation(:upload_part).input.shape.member_names
29
- )
30
-
12
+ CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
13
+ COMPLETE_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
14
+ UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
31
15
  CHECKSUM_KEYS = Set.new(
32
16
  Client.api.operation(:upload_part).input.shape.members.map do |n, s|
33
17
  n if s.location == 'header' && s.location_name.start_with?('x-amz-checksum-')
@@ -35,10 +19,9 @@ module Aws
35
19
  )
36
20
 
37
21
  # @option options [Client] :client
38
- # @option options [Integer] :thread_count (THREAD_COUNT)
39
22
  def initialize(options = {})
40
23
  @client = options[:client] || Client.new
41
- @thread_count = options[:thread_count] || THREAD_COUNT
24
+ @executor = options[:executor]
42
25
  end
43
26
 
44
27
  # @return [Client]
@@ -52,13 +35,12 @@ module Aws
52
35
  # It will be invoked with [bytes_read], [total_sizes]
53
36
  # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
54
37
  def upload(source, options = {})
55
- if File.size(source) < MIN_PART_SIZE
56
- raise ArgumentError, FILE_TOO_SMALL
57
- else
58
- upload_id = initiate_upload(options)
59
- parts = upload_parts(upload_id, source, options)
60
- complete_upload(upload_id, parts, options)
61
- end
38
+ file_size = File.size(source)
39
+ raise ArgumentError, 'unable to multipart upload files smaller than 5MB' if file_size < MIN_PART_SIZE
40
+
41
+ upload_id = initiate_upload(options)
42
+ parts = upload_parts(upload_id, source, file_size, options)
43
+ complete_upload(upload_id, parts, file_size, options)
62
44
  end
63
45
 
64
46
  private
@@ -67,19 +49,21 @@ module Aws
67
49
  @client.create_multipart_upload(create_opts(options)).upload_id
68
50
  end
69
51
 
70
- def complete_upload(upload_id, parts, options)
52
+ def complete_upload(upload_id, parts, file_size, options)
71
53
  @client.complete_multipart_upload(
72
- **complete_opts(options).merge(
73
- upload_id: upload_id,
74
- multipart_upload: { parts: parts }
75
- )
54
+ **complete_opts(options),
55
+ upload_id: upload_id,
56
+ multipart_upload: { parts: parts },
57
+ mpu_object_size: file_size
76
58
  )
59
+ rescue StandardError => e
60
+ abort_upload(upload_id, options, [e])
77
61
  end
78
62
 
79
- def upload_parts(upload_id, source, options)
80
- pending = PartList.new(compute_parts(upload_id, source, options))
63
+ def upload_parts(upload_id, source, file_size, options)
81
64
  completed = PartList.new
82
- errors = upload_in_threads(pending, completed, options)
65
+ pending = PartList.new(compute_parts(upload_id, source, file_size, options))
66
+ errors = upload_with_executor(pending, completed, options)
83
67
  if errors.empty?
84
68
  completed.to_a.sort_by { |part| part[:part_number] }
85
69
  else
@@ -88,35 +72,30 @@ module Aws
88
72
  end
89
73
 
90
74
  def abort_upload(upload_id, options, errors)
91
- @client.abort_multipart_upload(
92
- bucket: options[:bucket],
93
- key: options[:key],
94
- upload_id: upload_id
95
- )
75
+ @client.abort_multipart_upload(bucket: options[:bucket], key: options[:key], upload_id: upload_id)
96
76
  msg = "multipart upload failed: #{errors.map(&:message).join('; ')}"
97
77
  raise MultipartUploadError.new(msg, errors)
98
- rescue MultipartUploadError => error
99
- raise error
100
- rescue => error
101
- msg = "failed to abort multipart upload: #{error.message}. "\
102
- "Multipart upload failed: #{errors.map(&:message).join('; ')}"
103
- raise MultipartUploadError.new(msg, errors + [error])
78
+ rescue MultipartUploadError => e
79
+ raise e
80
+ rescue StandardError => e
81
+ msg = "failed to abort multipart upload: #{e.message}. " \
82
+ "Multipart upload failed: #{errors.map(&:message).join('; ')}"
83
+ raise MultipartUploadError.new(msg, errors + [e])
104
84
  end
105
85
 
106
- def compute_parts(upload_id, source, options)
107
- size = File.size(source)
108
- default_part_size = compute_default_part_size(size)
86
+ def compute_parts(upload_id, source, file_size, options)
87
+ default_part_size = compute_default_part_size(file_size)
109
88
  offset = 0
110
89
  part_number = 1
111
90
  parts = []
112
- while offset < size
91
+ while offset < file_size
113
92
  parts << upload_part_opts(options).merge(
114
93
  upload_id: upload_id,
115
94
  part_number: part_number,
116
95
  body: FilePart.new(
117
96
  source: source,
118
97
  offset: offset,
119
- size: part_size(size, default_part_size, offset)
98
+ size: part_size(file_size, default_part_size, offset)
120
99
  )
121
100
  )
122
101
  part_number += 1
@@ -133,74 +112,71 @@ module Aws
133
112
  keys.any? { |key| checksum_key?(key) }
134
113
  end
135
114
 
115
+ def checksum_not_required?(options)
116
+ @client.config.request_checksum_calculation == 'when_required' && !options[:checksum_algorithm]
117
+ end
118
+
136
119
  def create_opts(options)
137
- opts = { checksum_algorithm: Aws::Plugins::ChecksumAlgorithm::DEFAULT_CHECKSUM }
138
- opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
139
- CREATE_OPTIONS.inject(opts) do |hash, key|
140
- hash[key] = options[key] if options.key?(key)
141
- hash
120
+ opts = {}
121
+ unless checksum_not_required?(options)
122
+ opts[:checksum_algorithm] = Aws::Plugins::ChecksumAlgorithm::DEFAULT_CHECKSUM
142
123
  end
124
+ opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
125
+ CREATE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
143
126
  end
144
127
 
145
128
  def complete_opts(options)
146
129
  opts = {}
147
130
  opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
148
- COMPLETE_OPTIONS.inject(opts) do |hash, key|
149
- hash[key] = options[key] if options.key?(key)
150
- hash
151
- end
131
+ COMPLETE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
152
132
  end
153
133
 
154
134
  def upload_part_opts(options)
155
- UPLOAD_PART_OPTIONS.inject({}) do |hash, key|
156
- if options.key?(key)
157
- # don't pass through checksum calculations
158
- hash[key] = options[key] unless checksum_key?(key)
135
+ UPLOAD_PART_OPTIONS.each_with_object({}) do |key, hash|
136
+ # don't pass through checksum calculations
137
+ hash[key] = options[key] if options.key?(key) && !checksum_key?(key)
138
+ end
139
+ end
140
+
141
+ def upload_with_executor(pending, completed, options)
142
+ upload_attempts = 0
143
+ completion_queue = Queue.new
144
+ abort_upload = false
145
+ errors = []
146
+ progress = MultipartProgress.new(pending, options[:progress_callback])
147
+
148
+ while (part = pending.shift)
149
+ break if abort_upload
150
+
151
+ upload_attempts += 1
152
+ @executor.post(part) do |p|
153
+ update_progress(progress, p)
154
+ resp = @client.upload_part(p)
155
+ p[:body].close
156
+ completed_part = { etag: resp.etag, part_number: p[:part_number] }
157
+ apply_part_checksum(resp, completed_part)
158
+ completed.push(completed_part)
159
+ rescue StandardError => e
160
+ abort_upload = true
161
+ errors << e
162
+ ensure
163
+ completion_queue << :done
159
164
  end
160
- hash
161
165
  end
166
+
167
+ upload_attempts.times { completion_queue.pop }
168
+ errors
162
169
  end
163
170
 
164
- def upload_in_threads(pending, completed, options)
165
- threads = []
166
- if (callback = options[:progress_callback])
167
- progress = MultipartProgress.new(pending, callback)
168
- end
169
- options.fetch(:thread_count, @thread_count).times do
170
- thread = Thread.new do
171
- begin
172
- while part = pending.shift
173
- if progress
174
- part[:on_chunk_sent] =
175
- proc do |_chunk, bytes, _total|
176
- progress.call(part[:part_number], bytes)
177
- end
178
- end
179
- resp = @client.upload_part(part)
180
- part[:body].close
181
- completed_part = {
182
- etag: resp.etag,
183
- part_number: part[:part_number]
184
- }
185
- algorithm = resp.context.params[:checksum_algorithm]
186
- k = "checksum_#{algorithm.downcase}".to_sym
187
- completed_part[k] = resp.send(k)
188
- completed.push(completed_part)
189
- end
190
- nil
191
- rescue => error
192
- # keep other threads from uploading other parts
193
- pending.clear!
194
- error
195
- end
196
- end
197
- threads << thread
198
- end
199
- threads.map(&:value).compact
171
+ def apply_part_checksum(resp, part)
172
+ return unless (checksum = resp.context.params[:checksum_algorithm])
173
+
174
+ k = :"checksum_#{checksum.downcase}"
175
+ part[k] = resp.send(k)
200
176
  end
201
177
 
202
- def compute_default_part_size(source_size)
203
- [(source_size.to_f / MAX_PARTS).ceil, MIN_PART_SIZE].max.to_i
178
+ def compute_default_part_size(file_size)
179
+ [(file_size.to_f / MAX_PARTS).ceil, MIN_PART_SIZE].max.to_i
204
180
  end
205
181
 
206
182
  def part_size(total_size, part_size, offset)
@@ -211,9 +187,17 @@ module Aws
211
187
  end
212
188
  end
213
189
 
190
+ def update_progress(progress, part)
191
+ return unless progress.progress_callback
192
+
193
+ part[:on_chunk_sent] =
194
+ proc do |_chunk, bytes, _total|
195
+ progress.call(part[:part_number], bytes)
196
+ end
197
+ end
198
+
214
199
  # @api private
215
200
  class PartList
216
-
217
201
  def initialize(parts = [])
218
202
  @parts = parts
219
203
  @mutex = Mutex.new
@@ -242,7 +226,6 @@ module Aws
242
226
  def to_a
243
227
  @mutex.synchronize { @parts.dup }
244
228
  end
245
-
246
229
  end
247
230
 
248
231
  # @api private
@@ -253,6 +236,8 @@ module Aws
253
236
  @progress_callback = progress_callback
254
237
  end
255
238
 
239
+ attr_reader :progress_callback
240
+
256
241
  def call(part_number, bytes_read)
257
242
  # part numbers start at 1
258
243
  @bytes_sent[part_number - 1] = bytes_read
@@ -261,4 +246,4 @@ module Aws
261
246
  end
262
247
  end
263
248
  end
264
- end
249
+ end
@@ -9,33 +9,18 @@ module Aws
9
9
  module S3
10
10
  # @api private
11
11
  class MultipartStreamUploader
12
- # api private
13
- PART_SIZE = 5 * 1024 * 1024 # 5MB
14
12
 
15
- # api private
16
- THREAD_COUNT = 10
17
-
18
- # api private
19
- TEMPFILE_PREIX = 'aws-sdk-s3-upload_stream'.freeze
20
-
21
- # @api private
22
- CREATE_OPTIONS =
23
- Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
24
-
25
- # @api private
26
- UPLOAD_PART_OPTIONS =
27
- Set.new(Client.api.operation(:upload_part).input.shape.member_names)
28
-
29
- # @api private
30
- COMPLETE_UPLOAD_OPTIONS =
31
- Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
13
+ DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB
14
+ CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
15
+ UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
16
+ COMPLETE_UPLOAD_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
32
17
 
33
18
  # @option options [Client] :client
34
19
  def initialize(options = {})
35
20
  @client = options[:client] || Client.new
21
+ @executor = options[:executor]
36
22
  @tempfile = options[:tempfile]
37
- @part_size = options[:part_size] || PART_SIZE
38
- @thread_count = options[:thread_count] || THREAD_COUNT
23
+ @part_size = options[:part_size] || DEFAULT_PART_SIZE
39
24
  end
40
25
 
41
26
  # @return [Client]
@@ -43,7 +28,6 @@ module Aws
43
28
 
44
29
  # @option options [required,String] :bucket
45
30
  # @option options [required,String] :key
46
- # @option options [Integer] :thread_count (THREAD_COUNT)
47
31
  # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
48
32
  def upload(options = {}, &block)
49
33
  Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
@@ -61,86 +45,80 @@ module Aws
61
45
 
62
46
  def complete_upload(upload_id, parts, options)
63
47
  @client.complete_multipart_upload(
64
- **complete_opts(options).merge(
65
- upload_id: upload_id,
66
- multipart_upload: { parts: parts }
67
- )
48
+ **complete_opts(options).merge(upload_id: upload_id, multipart_upload: { parts: parts })
68
49
  )
50
+ rescue StandardError => e
51
+ abort_upload(upload_id, options, [e])
69
52
  end
70
53
 
71
54
  def upload_parts(upload_id, options, &block)
72
- completed = Queue.new
73
- thread_errors = []
74
- errors = begin
55
+ completed_parts = Queue.new
56
+ errors = []
57
+
58
+ begin
75
59
  IO.pipe do |read_pipe, write_pipe|
76
- threads = upload_in_threads(
77
- read_pipe, completed,
78
- upload_part_opts(options).merge(upload_id: upload_id),
79
- thread_errors)
80
- begin
81
- block.call(write_pipe)
82
- ensure
83
- # Ensure the pipe is closed to avoid https://github.com/jruby/jruby/issues/6111
84
- write_pipe.close
60
+ upload_thread = Thread.new do
61
+ upload_with_executor(
62
+ read_pipe,
63
+ completed_parts,
64
+ errors,
65
+ upload_part_opts(options).merge(upload_id: upload_id)
66
+ )
85
67
  end
86
- threads.map(&:value).compact
68
+
69
+ block.call(write_pipe)
70
+ ensure
71
+ # Ensure the pipe is closed to avoid https://github.com/jruby/jruby/issues/6111
72
+ write_pipe.close
73
+ upload_thread.join
87
74
  end
88
- rescue => e
89
- thread_errors + [e]
75
+ rescue StandardError => e
76
+ errors << e
90
77
  end
78
+ return ordered_parts(completed_parts) if errors.empty?
91
79
 
92
- if errors.empty?
93
- Array.new(completed.size) { completed.pop }.sort_by { |part| part[:part_number] }
94
- else
95
- abort_upload(upload_id, options, errors)
96
- end
80
+ abort_upload(upload_id, options, errors)
97
81
  end
98
82
 
99
83
  def abort_upload(upload_id, options, errors)
100
- @client.abort_multipart_upload(
101
- bucket: options[:bucket],
102
- key: options[:key],
103
- upload_id: upload_id
104
- )
84
+ @client.abort_multipart_upload(bucket: options[:bucket], key: options[:key], upload_id: upload_id)
105
85
  msg = "multipart upload failed: #{errors.map(&:message).join('; ')}"
106
86
  raise MultipartUploadError.new(msg, errors)
107
- rescue MultipartUploadError => error
108
- raise error
109
- rescue => error
110
- msg = "failed to abort multipart upload: #{error.message}. "\
87
+ rescue MultipartUploadError => e
88
+ raise e
89
+ rescue StandardError => e
90
+ msg = "failed to abort multipart upload: #{e.message}. "\
111
91
  "Multipart upload failed: #{errors.map(&:message).join('; ')}"
112
- raise MultipartUploadError.new(msg, errors + [error])
92
+ raise MultipartUploadError.new(msg, errors + [e])
113
93
  end
114
94
 
115
95
  def create_opts(options)
116
- CREATE_OPTIONS.inject({}) do |hash, key|
96
+ CREATE_OPTIONS.each_with_object({}) do |key, hash|
117
97
  hash[key] = options[key] if options.key?(key)
118
- hash
119
98
  end
120
99
  end
121
100
 
122
101
  def upload_part_opts(options)
123
- UPLOAD_PART_OPTIONS.inject({}) do |hash, key|
102
+ UPLOAD_PART_OPTIONS.each_with_object({}) do |key, hash|
124
103
  hash[key] = options[key] if options.key?(key)
125
- hash
126
104
  end
127
105
  end
128
106
 
129
107
  def complete_opts(options)
130
- COMPLETE_UPLOAD_OPTIONS.inject({}) do |hash, key|
108
+ COMPLETE_UPLOAD_OPTIONS.each_with_object({}) do |key, hash|
131
109
  hash[key] = options[key] if options.key?(key)
132
- hash
133
110
  end
134
111
  end
135
112
 
136
113
  def read_to_part_body(read_pipe)
137
114
  return if read_pipe.closed?
138
- temp_io = @tempfile ? Tempfile.new(TEMPFILE_PREIX) : StringIO.new(String.new)
115
+
116
+ temp_io = @tempfile ? Tempfile.new('aws-sdk-s3-upload_stream') : StringIO.new(String.new)
139
117
  temp_io.binmode
140
118
  bytes_copied = IO.copy_stream(read_pipe, temp_io, @part_size)
141
119
  temp_io.rewind
142
- if bytes_copied == 0
143
- if Tempfile === temp_io
120
+ if bytes_copied.zero?
121
+ if temp_io.is_a?(Tempfile)
144
122
  temp_io.close
145
123
  temp_io.unlink
146
124
  end
@@ -150,51 +128,62 @@ module Aws
150
128
  end
151
129
  end
152
130
 
153
- def upload_in_threads(read_pipe, completed, options, thread_errors)
154
- mutex = Mutex.new
131
+ def upload_with_executor(read_pipe, completed, errors, options)
132
+ completion_queue = Queue.new
133
+ queued_parts = 0
155
134
  part_number = 0
156
- options.fetch(:thread_count, @thread_count).times.map do
157
- thread = Thread.new do
158
- begin
159
- loop do
160
- body, thread_part_number = mutex.synchronize do
161
- [read_to_part_body(read_pipe), part_number += 1]
162
- end
163
- break unless (body || thread_part_number == 1)
164
- begin
165
- part = options.merge(
166
- body: body,
167
- part_number: thread_part_number,
168
- )
169
- resp = @client.upload_part(part)
170
- completed_part = {etag: resp.etag, part_number: part[:part_number]}
171
-
172
- # get the requested checksum from the response
173
- if part[:checksum_algorithm]
174
- k = "checksum_#{part[:checksum_algorithm].downcase}".to_sym
175
- completed_part[k] = resp[k]
176
- end
177
- completed.push(completed_part)
178
- ensure
179
- if Tempfile === body
180
- body.close
181
- body.unlink
182
- elsif StringIO === body
183
- body.string.clear
184
- end
185
- end
186
- end
187
- nil
188
- rescue => error
189
- # keep other threads from uploading other parts
190
- mutex.synchronize do
191
- thread_errors.push(error)
192
- read_pipe.close_read unless read_pipe.closed?
193
- end
194
- error
135
+ mutex = Mutex.new
136
+ loop do
137
+ part_body, current_part_num = mutex.synchronize do
138
+ [read_to_part_body(read_pipe), part_number += 1]
139
+ end
140
+ break unless part_body || current_part_num == 1
141
+
142
+ queued_parts += 1
143
+ @executor.post(part_body, current_part_num, options) do |body, num, opts|
144
+ part = opts.merge(body: body, part_number: num)
145
+ resp = @client.upload_part(part)
146
+ completed_part = create_completed_part(resp, part)
147
+ completed.push(completed_part)
148
+ rescue StandardError => e
149
+ mutex.synchronize do
150
+ errors.push(e)
151
+ read_pipe.close_read unless read_pipe.closed?
195
152
  end
153
+ ensure
154
+ clear_body(body)
155
+ completion_queue << :done
196
156
  end
197
- thread
157
+ end
158
+ queued_parts.times { completion_queue.pop }
159
+ end
160
+
161
+ def create_completed_part(resp, part)
162
+ completed_part = { etag: resp.etag, part_number: part[:part_number] }
163
+ return completed_part unless part[:checksum_algorithm]
164
+
165
+ # get the requested checksum from the response
166
+ k = "checksum_#{part[:checksum_algorithm].downcase}".to_sym
167
+ completed_part[k] = resp[k]
168
+ completed_part
169
+ end
170
+
171
+ def ordered_parts(parts)
172
+ sorted = []
173
+ until parts.empty?
174
+ part = parts.pop
175
+ index = sorted.bsearch_index { |p| p[:part_number] >= part[:part_number] } || sorted.size
176
+ sorted.insert(index, part)
177
+ end
178
+ sorted
179
+ end
180
+
181
+ def clear_body(body)
182
+ if body.is_a?(Tempfile)
183
+ body.close
184
+ body.unlink
185
+ elsif body.is_a?(StringIO)
186
+ body.string.clear
198
187
  end
199
188
  end
200
189
  end
@@ -2,17 +2,16 @@
2
2
 
3
3
  module Aws
4
4
  module S3
5
+ # Raise when multipart upload fails to complete.
5
6
  class MultipartUploadError < StandardError
6
7
 
7
- def initialize(message, errors)
8
+ def initialize(message, errors = [])
8
9
  @errors = errors
9
10
  super(message)
10
11
  end
11
12
 
12
- # @return [Array<StandardError>] The list of errors encountered
13
- # when uploading or aborting the upload.
13
+ # @return [Array<StandardError>] The list of errors encountered when uploading or aborting the upload.
14
14
  attr_reader :errors
15
-
16
15
  end
17
16
  end
18
17
  end