aws-sdk-s3 1.192.0 → 1.201.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,209 +1,253 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'pathname'
4
- require 'thread'
4
+ require 'securerandom'
5
5
  require 'set'
6
- require 'tmpdir'
7
6
 
8
7
  module Aws
9
8
  module S3
10
9
  # @api private
11
10
  class FileDownloader
12
-
13
11
  MIN_CHUNK_SIZE = 5 * 1024 * 1024
14
12
  MAX_PARTS = 10_000
15
- THREAD_COUNT = 10
13
+ HEAD_OPTIONS = Set.new(Client.api.operation(:head_object).input.shape.member_names)
14
+ GET_OPTIONS = Set.new(Client.api.operation(:get_object).input.shape.member_names)
16
15
 
17
16
  def initialize(options = {})
18
17
  @client = options[:client] || Client.new
18
+ @executor = options[:executor]
19
19
  end
20
20
 
21
21
  # @return [Client]
22
22
  attr_reader :client
23
23
 
24
24
  def download(destination, options = {})
25
- @path = destination
26
- @mode = options[:mode] || 'auto'
27
- @thread_count = options[:thread_count] || THREAD_COUNT
28
- @chunk_size = options[:chunk_size]
29
- @params = {
30
- bucket: options[:bucket],
31
- key: options[:key]
32
- }
33
- @params[:version_id] = options[:version_id] if options[:version_id]
34
- @on_checksum_validated = options[:on_checksum_validated]
35
- @progress_callback = options[:progress_callback]
36
-
37
- validate!
25
+ validate_destination!(destination)
26
+ opts = build_download_opts(destination, options)
27
+ validate_opts!(opts)
38
28
 
39
29
  Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
40
- case @mode
41
- when 'auto' then multipart_download
42
- when 'single_request' then single_request
43
- when 'get_range'
44
- if @chunk_size
45
- resp = @client.head_object(@params)
46
- multithreaded_get_by_ranges(resp.content_length, resp.etag)
47
- else
48
- msg = 'In :get_range mode, :chunk_size must be provided'
49
- raise ArgumentError, msg
50
- end
51
- else
52
- msg = "Invalid mode #{@mode} provided, "\
53
- 'mode should be :single_request, :get_range or :auto'
54
- raise ArgumentError, msg
30
+ case opts[:mode]
31
+ when 'auto' then multipart_download(opts)
32
+ when 'single_request' then single_request(opts)
33
+ when 'get_range' then range_request(opts)
55
34
  end
56
35
  end
36
+ File.rename(opts[:temp_path], destination) if opts[:temp_path]
37
+ ensure
38
+ cleanup_temp_file(opts)
57
39
  end
58
40
 
59
41
  private
60
42
 
61
- def validate!
62
- if @on_checksum_validated && !@on_checksum_validated.respond_to?(:call)
63
- raise ArgumentError, 'on_checksum_validated must be callable'
43
+ def build_download_opts(destination, opts)
44
+ {
45
+ destination: destination,
46
+ mode: opts.delete(:mode) || 'auto',
47
+ chunk_size: opts.delete(:chunk_size),
48
+ on_checksum_validated: opts.delete(:on_checksum_validated),
49
+ progress_callback: opts.delete(:progress_callback),
50
+ params: opts,
51
+ temp_path: nil
52
+ }
53
+ end
54
+
55
+ def cleanup_temp_file(opts)
56
+ return unless opts
57
+
58
+ temp_file = opts[:temp_path]
59
+ File.delete(temp_file) if temp_file && File.exist?(temp_file)
60
+ end
61
+
62
+ def download_with_executor(part_list, total_size, opts)
63
+ download_attempts = 0
64
+ completion_queue = Queue.new
65
+ abort_download = false
66
+ error = nil
67
+ progress = MultipartProgress.new(part_list, total_size, opts[:progress_callback])
68
+
69
+ while (part = part_list.shift)
70
+ break if abort_download
71
+
72
+ download_attempts += 1
73
+ @executor.post(part) do |p|
74
+ update_progress(progress, p)
75
+ resp = @client.get_object(p.params)
76
+ range = extract_range(resp.content_range)
77
+ validate_range(range, p.params[:range]) if p.params[:range]
78
+ write(resp.body, range, opts)
79
+
80
+ execute_checksum_callback(resp, opts)
81
+ rescue StandardError => e
82
+ abort_download = true
83
+ error = e
84
+ ensure
85
+ completion_queue << :done
86
+ end
64
87
  end
88
+
89
+ download_attempts.times { completion_queue.pop }
90
+ raise error unless error.nil?
91
+ end
92
+
93
+ def get_opts(opts)
94
+ GET_OPTIONS.each_with_object({}) { |k, h| h[k] = opts[k] if opts.key?(k) }
65
95
  end
66
96
 
67
- def multipart_download
68
- resp = @client.head_object(@params.merge(part_number: 1))
97
+ def head_opts(opts)
98
+ HEAD_OPTIONS.each_with_object({}) { |k, h| h[k] = opts[k] if opts.key?(k) }
99
+ end
100
+
101
+ def compute_chunk(chunk_size, file_size)
102
+ raise ArgumentError, ":chunk_size shouldn't exceed total file size." if chunk_size && chunk_size > file_size
103
+
104
+ chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
105
+ end
106
+
107
+ def compute_mode(file_size, total_parts, etag, opts)
108
+ chunk_size = compute_chunk(opts[:chunk_size], file_size)
109
+ part_size = (file_size.to_f / total_parts).ceil
110
+
111
+ resolve_temp_path(opts)
112
+ if chunk_size < part_size
113
+ multithreaded_get_by_ranges(file_size, etag, opts)
114
+ else
115
+ multithreaded_get_by_parts(total_parts, file_size, etag, opts)
116
+ end
117
+ end
118
+
119
+ def extract_range(value)
120
+ value.match(%r{bytes (?<range>\d+-\d+)/\d+})[:range]
121
+ end
122
+
123
+ def multipart_download(opts)
124
+ resp = @client.head_object(head_opts(opts[:params].merge(part_number: 1)))
69
125
  count = resp.parts_count
126
+
70
127
  if count.nil? || count <= 1
71
128
  if resp.content_length <= MIN_CHUNK_SIZE
72
- single_request
129
+ single_request(opts)
73
130
  else
74
- multithreaded_get_by_ranges(resp.content_length, resp.etag)
131
+ resolve_temp_path(opts)
132
+ multithreaded_get_by_ranges(resp.content_length, resp.etag, opts)
75
133
  end
76
134
  else
77
- # partNumber is an option
78
- resp = @client.head_object(@params)
135
+ # covers cases when given object is not uploaded via UploadPart API
136
+ resp = @client.head_object(head_opts(opts[:params])) # partNumber is an option
79
137
  if resp.content_length <= MIN_CHUNK_SIZE
80
- single_request
138
+ single_request(opts)
81
139
  else
82
- compute_mode(resp.content_length, count, resp.etag)
140
+ compute_mode(resp.content_length, count, resp.etag, opts)
83
141
  end
84
142
  end
85
143
  end
86
144
 
87
- def compute_mode(file_size, count, etag)
88
- chunk_size = compute_chunk(file_size)
89
- part_size = (file_size.to_f / count.to_f).ceil
90
- if chunk_size < part_size
91
- multithreaded_get_by_ranges(file_size, etag)
92
- else
93
- multithreaded_get_by_parts(count, file_size, etag)
145
+ def multithreaded_get_by_parts(total_parts, file_size, etag, opts)
146
+ parts = (1..total_parts).map do |part|
147
+ params = get_opts(opts[:params].merge(part_number: part, if_match: etag))
148
+ Part.new(part_number: part, params: params)
94
149
  end
150
+ download_with_executor(PartList.new(parts), file_size, opts)
95
151
  end
96
152
 
97
- def construct_chunks(file_size)
153
+ def multithreaded_get_by_ranges(file_size, etag, opts)
98
154
  offset = 0
99
- default_chunk_size = compute_chunk(file_size)
155
+ default_chunk_size = compute_chunk(opts[:chunk_size], file_size)
100
156
  chunks = []
157
+ part_number = 1 # parts start at 1
101
158
  while offset < file_size
102
159
  progress = offset + default_chunk_size
103
160
  progress = file_size if progress > file_size
104
- chunks << "bytes=#{offset}-#{progress - 1}"
161
+ params = get_opts(opts[:params].merge(range: "bytes=#{offset}-#{progress - 1}", if_match: etag))
162
+ chunks << Part.new(part_number: part_number, size: (progress - offset), params: params)
163
+ part_number += 1
105
164
  offset = progress
106
165
  end
107
- chunks
166
+ download_with_executor(PartList.new(chunks), file_size, opts)
108
167
  end
109
168
 
110
- def compute_chunk(file_size)
111
- if @chunk_size && @chunk_size > file_size
112
- raise ArgumentError, ":chunk_size shouldn't exceed total file size."
113
- else
114
- @chunk_size || [
115
- (file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE
116
- ].max.to_i
117
- end
169
+ def range_request(opts)
170
+ resp = @client.head_object(head_opts(opts[:params]))
171
+ resolve_temp_path(opts)
172
+ multithreaded_get_by_ranges(resp.content_length, resp.etag, opts)
173
+ end
174
+
175
+ def resolve_temp_path(opts)
176
+ return if [File, Tempfile].include?(opts[:destination].class)
177
+
178
+ opts[:temp_path] ||= "#{opts[:destination]}.s3tmp.#{SecureRandom.alphanumeric(8)}"
118
179
  end
119
180
 
120
- def batches(chunks, mode)
121
- chunks = (1..chunks) if mode.eql? 'part_number'
122
- chunks.each_slice(@thread_count).to_a
181
+ def single_request(opts)
182
+ params = get_opts(opts[:params]).merge(response_target: opts[:destination])
183
+ params[:on_chunk_received] = single_part_progress(opts) if opts[:progress_callback]
184
+ resp = @client.get_object(params)
185
+ return resp unless opts[:on_checksum_validated]
186
+
187
+ opts[:on_checksum_validated].call(resp.checksum_validated, resp) if resp.checksum_validated
188
+ resp
123
189
  end
124
190
 
125
- def multithreaded_get_by_ranges(file_size, etag)
126
- offset = 0
127
- default_chunk_size = compute_chunk(file_size)
128
- chunks = []
129
- part_number = 1 # parts start at 1
130
- while offset < file_size
131
- progress = offset + default_chunk_size
132
- progress = file_size if progress > file_size
133
- range = "bytes=#{offset}-#{progress - 1}"
134
- chunks << Part.new(
135
- part_number: part_number,
136
- size: (progress-offset),
137
- params: @params.merge(range: range, if_match: etag)
138
- )
139
- part_number += 1
140
- offset = progress
141
- end
142
- download_in_threads(PartList.new(chunks), file_size)
143
- end
144
-
145
- def multithreaded_get_by_parts(n_parts, total_size, etag)
146
- parts = (1..n_parts).map do |part|
147
- Part.new(part_number: part, params: @params.merge(part_number: part, if_match: etag))
148
- end
149
- download_in_threads(PartList.new(parts), total_size)
150
- end
151
-
152
- def download_in_threads(pending, total_size)
153
- threads = []
154
- progress = MultipartProgress.new(pending, total_size, @progress_callback) if @progress_callback
155
- @thread_count.times do
156
- thread = Thread.new do
157
- begin
158
- while part = pending.shift
159
- if progress
160
- part.params[:on_chunk_received] =
161
- proc do |_chunk, bytes, total|
162
- progress.call(part.part_number, bytes, total)
163
- end
164
- end
165
- resp = @client.get_object(part.params)
166
- write(resp)
167
- if @on_checksum_validated && resp.checksum_validated
168
- @on_checksum_validated.call(resp.checksum_validated, resp)
169
- end
170
- end
171
- nil
172
- rescue => error
173
- # keep other threads from downloading other parts
174
- pending.clear!
175
- raise error
176
- end
177
- end
178
- threads << thread
191
+ def single_part_progress(opts)
192
+ proc do |_chunk, bytes_read, total_size|
193
+ opts[:progress_callback].call([bytes_read], [total_size], total_size)
179
194
  end
180
- threads.map(&:value).compact
181
195
  end
182
196
 
183
- def write(resp)
184
- range, _ = resp.content_range.split(' ').last.split('/')
185
- head, _ = range.split('-').map {|s| s.to_i}
186
- File.write(@path, resp.body.read, head)
197
+ def update_progress(progress, part)
198
+ return unless progress.progress_callback
199
+
200
+ part.params[:on_chunk_received] =
201
+ proc do |_chunk, bytes, total|
202
+ progress.call(part.part_number, bytes, total)
203
+ end
187
204
  end
188
205
 
189
- def single_request
190
- params = @params.merge(response_target: @path)
191
- params[:on_chunk_received] = single_part_progress if @progress_callback
192
- resp = @client.get_object(params)
206
+ def execute_checksum_callback(resp, opts)
207
+ return unless opts[:on_checksum_validated] && resp.checksum_validated
193
208
 
194
- return resp unless @on_checksum_validated
209
+ opts[:on_checksum_validated].call(resp.checksum_validated, resp)
210
+ end
195
211
 
196
- @on_checksum_validated.call(resp.checksum_validated, resp) if resp.checksum_validated
212
+ def validate_destination!(destination)
213
+ valid_types = [String, Pathname, File, Tempfile]
214
+ return if valid_types.include?(destination.class)
197
215
 
198
- resp
216
+ raise ArgumentError, "Invalid destination, expected #{valid_types.join(', ')} but got: #{destination.class}"
199
217
  end
200
218
 
201
- def single_part_progress
202
- proc do |_chunk, bytes_read, total_size|
203
- @progress_callback.call([bytes_read], [total_size], total_size)
219
+ def validate_opts!(opts)
220
+ if opts[:on_checksum_validated] && !opts[:on_checksum_validated].respond_to?(:call)
221
+ raise ArgumentError, ':on_checksum_validated must be callable'
204
222
  end
223
+
224
+ valid_modes = %w[auto get_range single_request]
225
+ unless valid_modes.include?(opts[:mode])
226
+ msg = "Invalid mode #{opts[:mode]} provided, :mode should be single_request, get_range or auto"
227
+ raise ArgumentError, msg
228
+ end
229
+
230
+ if opts[:mode] == 'get_range' && opts[:chunk_size].nil?
231
+ raise ArgumentError, 'In get_range mode, :chunk_size must be provided'
232
+ end
233
+
234
+ if opts[:chunk_size] && opts[:chunk_size] <= 0
235
+ raise ArgumentError, ':chunk_size must be positive'
236
+ end
237
+ end
238
+
239
+ def validate_range(actual, expected)
240
+ return if actual == expected.match(/bytes=(?<range>\d+-\d+)/)[:range]
241
+
242
+ raise MultipartDownloadError, "multipart download failed: expected range of #{expected} but got #{actual}"
205
243
  end
206
244
 
245
+ def write(body, range, opts)
246
+ path = opts[:temp_path] || opts[:destination]
247
+ File.write(path, body.read, range.split('-').first.to_i)
248
+ end
249
+
250
+ # @api private
207
251
  class Part < Struct.new(:part_number, :size, :params)
208
252
  include Aws::Structure
209
253
  end
@@ -242,6 +286,8 @@ module Aws
242
286
  @progress_callback = progress_callback
243
287
  end
244
288
 
289
+ attr_reader :progress_callback
290
+
245
291
  def call(part_number, bytes_received, total)
246
292
  # part numbers start at 1
247
293
  @bytes_received[part_number - 1] = bytes_received
@@ -7,23 +7,21 @@ module Aws
7
7
  # @api private
8
8
  class FileUploader
9
9
 
10
- ONE_HUNDRED_MEGABYTES = 100 * 1024 * 1024
10
+ DEFAULT_MULTIPART_THRESHOLD = 100 * 1024 * 1024
11
11
 
12
12
  # @param [Hash] options
13
13
  # @option options [Client] :client
14
14
  # @option options [Integer] :multipart_threshold (104857600)
15
15
  def initialize(options = {})
16
- @options = options
17
16
  @client = options[:client] || Client.new
18
- @multipart_threshold = options[:multipart_threshold] ||
19
- ONE_HUNDRED_MEGABYTES
17
+ @executor = options[:executor]
18
+ @multipart_threshold = options[:multipart_threshold] || DEFAULT_MULTIPART_THRESHOLD
20
19
  end
21
20
 
22
21
  # @return [Client]
23
22
  attr_reader :client
24
23
 
25
- # @return [Integer] Files larger than or equal to this in bytes are uploaded
26
- # using a {MultipartFileUploader}.
24
+ # @return [Integer] Files larger than or equal to this in bytes are uploaded using a {MultipartFileUploader}.
27
25
  attr_reader :multipart_threshold
28
26
 
29
27
  # @param [String, Pathname, File, Tempfile] source The file to upload.
@@ -38,11 +36,9 @@ module Aws
38
36
  # @return [void]
39
37
  def upload(source, options = {})
40
38
  Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
41
- if File.size(source) >= multipart_threshold
42
- MultipartFileUploader.new(@options).upload(source, options)
39
+ if File.size(source) >= @multipart_threshold
40
+ MultipartFileUploader.new(client: @client, executor: @executor).upload(source, options)
43
41
  else
44
- # remove multipart parameters not supported by put_object
45
- options.delete(:thread_count)
46
42
  put_object(source, options)
47
43
  end
48
44
  end
@@ -50,9 +46,9 @@ module Aws
50
46
 
51
47
  private
52
48
 
53
- def open_file(source)
54
- if String === source || Pathname === source
55
- File.open(source, 'rb') { |file| yield(file) }
49
+ def open_file(source, &block)
50
+ if source.is_a?(String) || source.is_a?(Pathname)
51
+ File.open(source, 'rb', &block)
56
52
  else
57
53
  yield(source)
58
54
  end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aws
4
+ module S3
5
+ # Raised when multipart download fails to complete.
6
+ class MultipartDownloadError < StandardError; end
7
+ end
8
+ end