aws-sdk-s3 1.193.0 → 1.200.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,27 +7,11 @@ module Aws
7
7
  module S3
8
8
  # @api private
9
9
  class MultipartFileUploader
10
-
11
10
  MIN_PART_SIZE = 5 * 1024 * 1024 # 5MB
12
-
13
- FILE_TOO_SMALL = "unable to multipart upload files smaller than 5MB"
14
-
15
11
  MAX_PARTS = 10_000
16
-
17
- THREAD_COUNT = 10
18
-
19
- CREATE_OPTIONS = Set.new(
20
- Client.api.operation(:create_multipart_upload).input.shape.member_names
21
- )
22
-
23
- COMPLETE_OPTIONS = Set.new(
24
- Client.api.operation(:complete_multipart_upload).input.shape.member_names
25
- )
26
-
27
- UPLOAD_PART_OPTIONS = Set.new(
28
- Client.api.operation(:upload_part).input.shape.member_names
29
- )
30
-
12
+ CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
13
+ COMPLETE_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
14
+ UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
31
15
  CHECKSUM_KEYS = Set.new(
32
16
  Client.api.operation(:upload_part).input.shape.members.map do |n, s|
33
17
  n if s.location == 'header' && s.location_name.start_with?('x-amz-checksum-')
@@ -35,10 +19,9 @@ module Aws
35
19
  )
36
20
 
37
21
  # @option options [Client] :client
38
- # @option options [Integer] :thread_count (THREAD_COUNT)
39
22
  def initialize(options = {})
40
23
  @client = options[:client] || Client.new
41
- @thread_count = options[:thread_count] || THREAD_COUNT
24
+ @executor = options[:executor]
42
25
  end
43
26
 
44
27
  # @return [Client]
@@ -52,13 +35,12 @@ module Aws
52
35
  # It will be invoked with [bytes_read], [total_sizes]
53
36
  # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
54
37
  def upload(source, options = {})
55
- if File.size(source) < MIN_PART_SIZE
56
- raise ArgumentError, FILE_TOO_SMALL
57
- else
58
- upload_id = initiate_upload(options)
59
- parts = upload_parts(upload_id, source, options)
60
- complete_upload(upload_id, parts, options)
61
- end
38
+ file_size = File.size(source)
39
+ raise ArgumentError, 'unable to multipart upload files smaller than 5MB' if file_size < MIN_PART_SIZE
40
+
41
+ upload_id = initiate_upload(options)
42
+ parts = upload_parts(upload_id, source, file_size, options)
43
+ complete_upload(upload_id, parts, file_size, options)
62
44
  end
63
45
 
64
46
  private
@@ -67,19 +49,22 @@ module Aws
67
49
  @client.create_multipart_upload(create_opts(options)).upload_id
68
50
  end
69
51
 
70
- def complete_upload(upload_id, parts, options)
52
+ def complete_upload(upload_id, parts, file_size, options)
71
53
  @client.complete_multipart_upload(
72
54
  **complete_opts(options).merge(
73
55
  upload_id: upload_id,
74
- multipart_upload: { parts: parts }
56
+ multipart_upload: { parts: parts },
57
+ mpu_object_size: file_size
75
58
  )
76
59
  )
60
+ rescue StandardError => e
61
+ abort_upload(upload_id, options, [e])
77
62
  end
78
63
 
79
- def upload_parts(upload_id, source, options)
80
- pending = PartList.new(compute_parts(upload_id, source, options))
64
+ def upload_parts(upload_id, source, file_size, options)
81
65
  completed = PartList.new
82
- errors = upload_in_threads(pending, completed, options)
66
+ pending = PartList.new(compute_parts(upload_id, source, file_size, options))
67
+ errors = upload_with_executor(pending, completed, options)
83
68
  if errors.empty?
84
69
  completed.to_a.sort_by { |part| part[:part_number] }
85
70
  else
@@ -88,35 +73,30 @@ module Aws
88
73
  end
89
74
 
90
75
  def abort_upload(upload_id, options, errors)
91
- @client.abort_multipart_upload(
92
- bucket: options[:bucket],
93
- key: options[:key],
94
- upload_id: upload_id
95
- )
76
+ @client.abort_multipart_upload(bucket: options[:bucket], key: options[:key], upload_id: upload_id)
96
77
  msg = "multipart upload failed: #{errors.map(&:message).join('; ')}"
97
78
  raise MultipartUploadError.new(msg, errors)
98
- rescue MultipartUploadError => error
99
- raise error
100
- rescue => error
101
- msg = "failed to abort multipart upload: #{error.message}. "\
79
+ rescue MultipartUploadError => e
80
+ raise e
81
+ rescue StandardError => e
82
+ msg = "failed to abort multipart upload: #{e.message}. "\
102
83
  "Multipart upload failed: #{errors.map(&:message).join('; ')}"
103
- raise MultipartUploadError.new(msg, errors + [error])
84
+ raise MultipartUploadError.new(msg, errors + [e])
104
85
  end
105
86
 
106
- def compute_parts(upload_id, source, options)
107
- size = File.size(source)
108
- default_part_size = compute_default_part_size(size)
87
+ def compute_parts(upload_id, source, file_size, options)
88
+ default_part_size = compute_default_part_size(file_size)
109
89
  offset = 0
110
90
  part_number = 1
111
91
  parts = []
112
- while offset < size
92
+ while offset < file_size
113
93
  parts << upload_part_opts(options).merge(
114
94
  upload_id: upload_id,
115
95
  part_number: part_number,
116
96
  body: FilePart.new(
117
97
  source: source,
118
98
  offset: offset,
119
- size: part_size(size, default_part_size, offset)
99
+ size: part_size(file_size, default_part_size, offset)
120
100
  )
121
101
  )
122
102
  part_number += 1
@@ -136,71 +116,56 @@ module Aws
136
116
  def create_opts(options)
137
117
  opts = { checksum_algorithm: Aws::Plugins::ChecksumAlgorithm::DEFAULT_CHECKSUM }
138
118
  opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
139
- CREATE_OPTIONS.inject(opts) do |hash, key|
140
- hash[key] = options[key] if options.key?(key)
141
- hash
142
- end
119
+ CREATE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
143
120
  end
144
121
 
145
122
  def complete_opts(options)
146
123
  opts = {}
147
124
  opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
148
- COMPLETE_OPTIONS.inject(opts) do |hash, key|
149
- hash[key] = options[key] if options.key?(key)
150
- hash
151
- end
125
+ COMPLETE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
152
126
  end
153
127
 
154
128
  def upload_part_opts(options)
155
- UPLOAD_PART_OPTIONS.inject({}) do |hash, key|
156
- if options.key?(key)
157
- # don't pass through checksum calculations
158
- hash[key] = options[key] unless checksum_key?(key)
129
+ UPLOAD_PART_OPTIONS.each_with_object({}) do |key, hash|
130
+ # don't pass through checksum calculations
131
+ hash[key] = options[key] if options.key?(key) && !checksum_key?(key)
132
+ end
133
+ end
134
+
135
+ def upload_with_executor(pending, completed, options)
136
+ upload_attempts = 0
137
+ completion_queue = Queue.new
138
+ abort_upload = false
139
+ errors = []
140
+ progress = MultipartProgress.new(pending, options[:progress_callback])
141
+
142
+ while (part = pending.shift)
143
+ break if abort_upload
144
+
145
+ upload_attempts += 1
146
+ @executor.post(part) do |p|
147
+ update_progress(progress, p)
148
+ resp = @client.upload_part(p)
149
+ p[:body].close
150
+ completed_part = { etag: resp.etag, part_number: p[:part_number] }
151
+ algorithm = resp.context.params[:checksum_algorithm].downcase
152
+ k = "checksum_#{algorithm}".to_sym
153
+ completed_part[k] = resp.send(k)
154
+ completed.push(completed_part)
155
+ rescue StandardError => e
156
+ abort_upload = true
157
+ errors << e
158
+ ensure
159
+ completion_queue << :done
159
160
  end
160
- hash
161
161
  end
162
- end
163
162
 
164
- def upload_in_threads(pending, completed, options)
165
- threads = []
166
- if (callback = options[:progress_callback])
167
- progress = MultipartProgress.new(pending, callback)
168
- end
169
- options.fetch(:thread_count, @thread_count).times do
170
- thread = Thread.new do
171
- begin
172
- while part = pending.shift
173
- if progress
174
- part[:on_chunk_sent] =
175
- proc do |_chunk, bytes, _total|
176
- progress.call(part[:part_number], bytes)
177
- end
178
- end
179
- resp = @client.upload_part(part)
180
- part[:body].close
181
- completed_part = {
182
- etag: resp.etag,
183
- part_number: part[:part_number]
184
- }
185
- algorithm = resp.context.params[:checksum_algorithm]
186
- k = "checksum_#{algorithm.downcase}".to_sym
187
- completed_part[k] = resp.send(k)
188
- completed.push(completed_part)
189
- end
190
- nil
191
- rescue => error
192
- # keep other threads from uploading other parts
193
- pending.clear!
194
- error
195
- end
196
- end
197
- threads << thread
198
- end
199
- threads.map(&:value).compact
163
+ upload_attempts.times { completion_queue.pop }
164
+ errors
200
165
  end
201
166
 
202
- def compute_default_part_size(source_size)
203
- [(source_size.to_f / MAX_PARTS).ceil, MIN_PART_SIZE].max.to_i
167
+ def compute_default_part_size(file_size)
168
+ [(file_size.to_f / MAX_PARTS).ceil, MIN_PART_SIZE].max.to_i
204
169
  end
205
170
 
206
171
  def part_size(total_size, part_size, offset)
@@ -211,9 +176,17 @@ module Aws
211
176
  end
212
177
  end
213
178
 
179
+ def update_progress(progress, part)
180
+ return unless progress.progress_callback
181
+
182
+ part[:on_chunk_sent] =
183
+ proc do |_chunk, bytes, _total|
184
+ progress.call(part[:part_number], bytes)
185
+ end
186
+ end
187
+
214
188
  # @api private
215
189
  class PartList
216
-
217
190
  def initialize(parts = [])
218
191
  @parts = parts
219
192
  @mutex = Mutex.new
@@ -242,7 +215,6 @@ module Aws
242
215
  def to_a
243
216
  @mutex.synchronize { @parts.dup }
244
217
  end
245
-
246
218
  end
247
219
 
248
220
  # @api private
@@ -253,6 +225,8 @@ module Aws
253
225
  @progress_callback = progress_callback
254
226
  end
255
227
 
228
+ attr_reader :progress_callback
229
+
256
230
  def call(part_number, bytes_read)
257
231
  # part numbers start at 1
258
232
  @bytes_sent[part_number - 1] = bytes_read
@@ -261,4 +235,4 @@ module Aws
261
235
  end
262
236
  end
263
237
  end
264
- end
238
+ end
@@ -9,33 +9,18 @@ module Aws
9
9
  module S3
10
10
  # @api private
11
11
  class MultipartStreamUploader
12
- # api private
13
- PART_SIZE = 5 * 1024 * 1024 # 5MB
14
12
 
15
- # api private
16
- THREAD_COUNT = 10
17
-
18
- # api private
19
- TEMPFILE_PREIX = 'aws-sdk-s3-upload_stream'.freeze
20
-
21
- # @api private
22
- CREATE_OPTIONS =
23
- Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
24
-
25
- # @api private
26
- UPLOAD_PART_OPTIONS =
27
- Set.new(Client.api.operation(:upload_part).input.shape.member_names)
28
-
29
- # @api private
30
- COMPLETE_UPLOAD_OPTIONS =
31
- Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
13
+ DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB
14
+ CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
15
+ UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
16
+ COMPLETE_UPLOAD_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
32
17
 
33
18
  # @option options [Client] :client
34
19
  def initialize(options = {})
35
20
  @client = options[:client] || Client.new
21
+ @executor = options[:executor]
36
22
  @tempfile = options[:tempfile]
37
- @part_size = options[:part_size] || PART_SIZE
38
- @thread_count = options[:thread_count] || THREAD_COUNT
23
+ @part_size = options[:part_size] || DEFAULT_PART_SIZE
39
24
  end
40
25
 
41
26
  # @return [Client]
@@ -43,7 +28,6 @@ module Aws
43
28
 
44
29
  # @option options [required,String] :bucket
45
30
  # @option options [required,String] :key
46
- # @option options [Integer] :thread_count (THREAD_COUNT)
47
31
  # @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
48
32
  def upload(options = {}, &block)
49
33
  Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
@@ -61,86 +45,80 @@ module Aws
61
45
 
62
46
  def complete_upload(upload_id, parts, options)
63
47
  @client.complete_multipart_upload(
64
- **complete_opts(options).merge(
65
- upload_id: upload_id,
66
- multipart_upload: { parts: parts }
67
- )
48
+ **complete_opts(options).merge(upload_id: upload_id, multipart_upload: { parts: parts })
68
49
  )
50
+ rescue StandardError => e
51
+ abort_upload(upload_id, options, [e])
69
52
  end
70
53
 
71
54
  def upload_parts(upload_id, options, &block)
72
- completed = Queue.new
73
- thread_errors = []
74
- errors = begin
55
+ completed_parts = Queue.new
56
+ errors = []
57
+
58
+ begin
75
59
  IO.pipe do |read_pipe, write_pipe|
76
- threads = upload_in_threads(
77
- read_pipe, completed,
78
- upload_part_opts(options).merge(upload_id: upload_id),
79
- thread_errors)
80
- begin
81
- block.call(write_pipe)
82
- ensure
83
- # Ensure the pipe is closed to avoid https://github.com/jruby/jruby/issues/6111
84
- write_pipe.close
60
+ upload_thread = Thread.new do
61
+ upload_with_executor(
62
+ read_pipe,
63
+ completed_parts,
64
+ errors,
65
+ upload_part_opts(options).merge(upload_id: upload_id)
66
+ )
85
67
  end
86
- threads.map(&:value).compact
68
+
69
+ block.call(write_pipe)
70
+ ensure
71
+ # Ensure the pipe is closed to avoid https://github.com/jruby/jruby/issues/6111
72
+ write_pipe.close
73
+ upload_thread.join
87
74
  end
88
- rescue => e
89
- thread_errors + [e]
75
+ rescue StandardError => e
76
+ errors << e
90
77
  end
78
+ return ordered_parts(completed_parts) if errors.empty?
91
79
 
92
- if errors.empty?
93
- Array.new(completed.size) { completed.pop }.sort_by { |part| part[:part_number] }
94
- else
95
- abort_upload(upload_id, options, errors)
96
- end
80
+ abort_upload(upload_id, options, errors)
97
81
  end
98
82
 
99
83
  def abort_upload(upload_id, options, errors)
100
- @client.abort_multipart_upload(
101
- bucket: options[:bucket],
102
- key: options[:key],
103
- upload_id: upload_id
104
- )
84
+ @client.abort_multipart_upload(bucket: options[:bucket], key: options[:key], upload_id: upload_id)
105
85
  msg = "multipart upload failed: #{errors.map(&:message).join('; ')}"
106
86
  raise MultipartUploadError.new(msg, errors)
107
- rescue MultipartUploadError => error
108
- raise error
109
- rescue => error
110
- msg = "failed to abort multipart upload: #{error.message}. "\
87
+ rescue MultipartUploadError => e
88
+ raise e
89
+ rescue StandardError => e
90
+ msg = "failed to abort multipart upload: #{e.message}. "\
111
91
  "Multipart upload failed: #{errors.map(&:message).join('; ')}"
112
- raise MultipartUploadError.new(msg, errors + [error])
92
+ raise MultipartUploadError.new(msg, errors + [e])
113
93
  end
114
94
 
115
95
  def create_opts(options)
116
- CREATE_OPTIONS.inject({}) do |hash, key|
96
+ CREATE_OPTIONS.each_with_object({}) do |key, hash|
117
97
  hash[key] = options[key] if options.key?(key)
118
- hash
119
98
  end
120
99
  end
121
100
 
122
101
  def upload_part_opts(options)
123
- UPLOAD_PART_OPTIONS.inject({}) do |hash, key|
102
+ UPLOAD_PART_OPTIONS.each_with_object({}) do |key, hash|
124
103
  hash[key] = options[key] if options.key?(key)
125
- hash
126
104
  end
127
105
  end
128
106
 
129
107
  def complete_opts(options)
130
- COMPLETE_UPLOAD_OPTIONS.inject({}) do |hash, key|
108
+ COMPLETE_UPLOAD_OPTIONS.each_with_object({}) do |key, hash|
131
109
  hash[key] = options[key] if options.key?(key)
132
- hash
133
110
  end
134
111
  end
135
112
 
136
113
  def read_to_part_body(read_pipe)
137
114
  return if read_pipe.closed?
138
- temp_io = @tempfile ? Tempfile.new(TEMPFILE_PREIX) : StringIO.new(String.new)
115
+
116
+ temp_io = @tempfile ? Tempfile.new('aws-sdk-s3-upload_stream') : StringIO.new(String.new)
139
117
  temp_io.binmode
140
118
  bytes_copied = IO.copy_stream(read_pipe, temp_io, @part_size)
141
119
  temp_io.rewind
142
- if bytes_copied == 0
143
- if Tempfile === temp_io
120
+ if bytes_copied.zero?
121
+ if temp_io.is_a?(Tempfile)
144
122
  temp_io.close
145
123
  temp_io.unlink
146
124
  end
@@ -150,51 +128,62 @@ module Aws
150
128
  end
151
129
  end
152
130
 
153
- def upload_in_threads(read_pipe, completed, options, thread_errors)
154
- mutex = Mutex.new
131
+ def upload_with_executor(read_pipe, completed, errors, options)
132
+ completion_queue = Queue.new
133
+ queued_parts = 0
155
134
  part_number = 0
156
- options.fetch(:thread_count, @thread_count).times.map do
157
- thread = Thread.new do
158
- begin
159
- loop do
160
- body, thread_part_number = mutex.synchronize do
161
- [read_to_part_body(read_pipe), part_number += 1]
162
- end
163
- break unless (body || thread_part_number == 1)
164
- begin
165
- part = options.merge(
166
- body: body,
167
- part_number: thread_part_number,
168
- )
169
- resp = @client.upload_part(part)
170
- completed_part = {etag: resp.etag, part_number: part[:part_number]}
171
-
172
- # get the requested checksum from the response
173
- if part[:checksum_algorithm]
174
- k = "checksum_#{part[:checksum_algorithm].downcase}".to_sym
175
- completed_part[k] = resp[k]
176
- end
177
- completed.push(completed_part)
178
- ensure
179
- if Tempfile === body
180
- body.close
181
- body.unlink
182
- elsif StringIO === body
183
- body.string.clear
184
- end
185
- end
186
- end
187
- nil
188
- rescue => error
189
- # keep other threads from uploading other parts
190
- mutex.synchronize do
191
- thread_errors.push(error)
192
- read_pipe.close_read unless read_pipe.closed?
193
- end
194
- error
135
+ mutex = Mutex.new
136
+ loop do
137
+ part_body, current_part_num = mutex.synchronize do
138
+ [read_to_part_body(read_pipe), part_number += 1]
139
+ end
140
+ break unless part_body || current_part_num == 1
141
+
142
+ queued_parts += 1
143
+ @executor.post(part_body, current_part_num, options) do |body, num, opts|
144
+ part = opts.merge(body: body, part_number: num)
145
+ resp = @client.upload_part(part)
146
+ completed_part = create_completed_part(resp, part)
147
+ completed.push(completed_part)
148
+ rescue StandardError => e
149
+ mutex.synchronize do
150
+ errors.push(e)
151
+ read_pipe.close_read unless read_pipe.closed?
195
152
  end
153
+ ensure
154
+ clear_body(body)
155
+ completion_queue << :done
196
156
  end
197
- thread
157
+ end
158
+ queued_parts.times { completion_queue.pop }
159
+ end
160
+
161
+ def create_completed_part(resp, part)
162
+ completed_part = { etag: resp.etag, part_number: part[:part_number] }
163
+ return completed_part unless part[:checksum_algorithm]
164
+
165
+ # get the requested checksum from the response
166
+ k = "checksum_#{part[:checksum_algorithm].downcase}".to_sym
167
+ completed_part[k] = resp[k]
168
+ completed_part
169
+ end
170
+
171
+ def ordered_parts(parts)
172
+ sorted = []
173
+ until parts.empty?
174
+ part = parts.pop
175
+ index = sorted.bsearch_index { |p| p[:part_number] >= part[:part_number] } || sorted.size
176
+ sorted.insert(index, part)
177
+ end
178
+ sorted
179
+ end
180
+
181
+ def clear_body(body)
182
+ if body.is_a?(Tempfile)
183
+ body.close
184
+ body.unlink
185
+ elsif body.is_a?(StringIO)
186
+ body.string.clear
198
187
  end
199
188
  end
200
189
  end
@@ -2,17 +2,16 @@
2
2
 
3
3
  module Aws
4
4
  module S3
5
+ # Raise when multipart upload fails to complete.
5
6
  class MultipartUploadError < StandardError
6
7
 
7
- def initialize(message, errors)
8
+ def initialize(message, errors = [])
8
9
  @errors = errors
9
10
  super(message)
10
11
  end
11
12
 
12
- # @return [Array<StandardError>] The list of errors encountered
13
- # when uploading or aborting the upload.
13
+ # @return [Array<StandardError>] The list of errors encountered when uploading or aborting the upload.
14
14
  attr_reader :errors
15
-
16
15
  end
17
16
  end
18
17
  end