aws-sdk-s3 1.192.0 → 1.201.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +69 -0
- data/VERSION +1 -1
- data/lib/aws-sdk-s3/client.rb +585 -46
- data/lib/aws-sdk-s3/client_api.rb +182 -0
- data/lib/aws-sdk-s3/customizations/object.rb +77 -86
- data/lib/aws-sdk-s3/customizations.rb +3 -1
- data/lib/aws-sdk-s3/default_executor.rb +103 -0
- data/lib/aws-sdk-s3/endpoints.rb +70 -0
- data/lib/aws-sdk-s3/file_downloader.rb +180 -134
- data/lib/aws-sdk-s3/file_uploader.rb +9 -13
- data/lib/aws-sdk-s3/multipart_download_error.rb +8 -0
- data/lib/aws-sdk-s3/multipart_file_uploader.rb +92 -107
- data/lib/aws-sdk-s3/multipart_stream_uploader.rb +96 -107
- data/lib/aws-sdk-s3/multipart_upload_error.rb +3 -4
- data/lib/aws-sdk-s3/transfer_manager.rb +304 -0
- data/lib/aws-sdk-s3/types.rb +702 -25
- data/lib/aws-sdk-s3.rb +1 -1
- data/sig/client.rbs +78 -0
- data/sig/resource.rbs +1 -0
- data/sig/types.rbs +122 -0
- metadata +6 -3
|
@@ -1,209 +1,253 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'pathname'
|
|
4
|
-
require '
|
|
4
|
+
require 'securerandom'
|
|
5
5
|
require 'set'
|
|
6
|
-
require 'tmpdir'
|
|
7
6
|
|
|
8
7
|
module Aws
|
|
9
8
|
module S3
|
|
10
9
|
# @api private
|
|
11
10
|
class FileDownloader
|
|
12
|
-
|
|
13
11
|
MIN_CHUNK_SIZE = 5 * 1024 * 1024
|
|
14
12
|
MAX_PARTS = 10_000
|
|
15
|
-
|
|
13
|
+
HEAD_OPTIONS = Set.new(Client.api.operation(:head_object).input.shape.member_names)
|
|
14
|
+
GET_OPTIONS = Set.new(Client.api.operation(:get_object).input.shape.member_names)
|
|
16
15
|
|
|
17
16
|
def initialize(options = {})
|
|
18
17
|
@client = options[:client] || Client.new
|
|
18
|
+
@executor = options[:executor]
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
# @return [Client]
|
|
22
22
|
attr_reader :client
|
|
23
23
|
|
|
24
24
|
def download(destination, options = {})
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@chunk_size = options[:chunk_size]
|
|
29
|
-
@params = {
|
|
30
|
-
bucket: options[:bucket],
|
|
31
|
-
key: options[:key]
|
|
32
|
-
}
|
|
33
|
-
@params[:version_id] = options[:version_id] if options[:version_id]
|
|
34
|
-
@on_checksum_validated = options[:on_checksum_validated]
|
|
35
|
-
@progress_callback = options[:progress_callback]
|
|
36
|
-
|
|
37
|
-
validate!
|
|
25
|
+
validate_destination!(destination)
|
|
26
|
+
opts = build_download_opts(destination, options)
|
|
27
|
+
validate_opts!(opts)
|
|
38
28
|
|
|
39
29
|
Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
|
|
40
|
-
case
|
|
41
|
-
when 'auto' then multipart_download
|
|
42
|
-
when 'single_request' then single_request
|
|
43
|
-
when 'get_range'
|
|
44
|
-
if @chunk_size
|
|
45
|
-
resp = @client.head_object(@params)
|
|
46
|
-
multithreaded_get_by_ranges(resp.content_length, resp.etag)
|
|
47
|
-
else
|
|
48
|
-
msg = 'In :get_range mode, :chunk_size must be provided'
|
|
49
|
-
raise ArgumentError, msg
|
|
50
|
-
end
|
|
51
|
-
else
|
|
52
|
-
msg = "Invalid mode #{@mode} provided, "\
|
|
53
|
-
'mode should be :single_request, :get_range or :auto'
|
|
54
|
-
raise ArgumentError, msg
|
|
30
|
+
case opts[:mode]
|
|
31
|
+
when 'auto' then multipart_download(opts)
|
|
32
|
+
when 'single_request' then single_request(opts)
|
|
33
|
+
when 'get_range' then range_request(opts)
|
|
55
34
|
end
|
|
56
35
|
end
|
|
36
|
+
File.rename(opts[:temp_path], destination) if opts[:temp_path]
|
|
37
|
+
ensure
|
|
38
|
+
cleanup_temp_file(opts)
|
|
57
39
|
end
|
|
58
40
|
|
|
59
41
|
private
|
|
60
42
|
|
|
61
|
-
def
|
|
62
|
-
|
|
63
|
-
|
|
43
|
+
def build_download_opts(destination, opts)
|
|
44
|
+
{
|
|
45
|
+
destination: destination,
|
|
46
|
+
mode: opts.delete(:mode) || 'auto',
|
|
47
|
+
chunk_size: opts.delete(:chunk_size),
|
|
48
|
+
on_checksum_validated: opts.delete(:on_checksum_validated),
|
|
49
|
+
progress_callback: opts.delete(:progress_callback),
|
|
50
|
+
params: opts,
|
|
51
|
+
temp_path: nil
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def cleanup_temp_file(opts)
|
|
56
|
+
return unless opts
|
|
57
|
+
|
|
58
|
+
temp_file = opts[:temp_path]
|
|
59
|
+
File.delete(temp_file) if temp_file && File.exist?(temp_file)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def download_with_executor(part_list, total_size, opts)
|
|
63
|
+
download_attempts = 0
|
|
64
|
+
completion_queue = Queue.new
|
|
65
|
+
abort_download = false
|
|
66
|
+
error = nil
|
|
67
|
+
progress = MultipartProgress.new(part_list, total_size, opts[:progress_callback])
|
|
68
|
+
|
|
69
|
+
while (part = part_list.shift)
|
|
70
|
+
break if abort_download
|
|
71
|
+
|
|
72
|
+
download_attempts += 1
|
|
73
|
+
@executor.post(part) do |p|
|
|
74
|
+
update_progress(progress, p)
|
|
75
|
+
resp = @client.get_object(p.params)
|
|
76
|
+
range = extract_range(resp.content_range)
|
|
77
|
+
validate_range(range, p.params[:range]) if p.params[:range]
|
|
78
|
+
write(resp.body, range, opts)
|
|
79
|
+
|
|
80
|
+
execute_checksum_callback(resp, opts)
|
|
81
|
+
rescue StandardError => e
|
|
82
|
+
abort_download = true
|
|
83
|
+
error = e
|
|
84
|
+
ensure
|
|
85
|
+
completion_queue << :done
|
|
86
|
+
end
|
|
64
87
|
end
|
|
88
|
+
|
|
89
|
+
download_attempts.times { completion_queue.pop }
|
|
90
|
+
raise error unless error.nil?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def get_opts(opts)
|
|
94
|
+
GET_OPTIONS.each_with_object({}) { |k, h| h[k] = opts[k] if opts.key?(k) }
|
|
65
95
|
end
|
|
66
96
|
|
|
67
|
-
def
|
|
68
|
-
|
|
97
|
+
def head_opts(opts)
|
|
98
|
+
HEAD_OPTIONS.each_with_object({}) { |k, h| h[k] = opts[k] if opts.key?(k) }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def compute_chunk(chunk_size, file_size)
|
|
102
|
+
raise ArgumentError, ":chunk_size shouldn't exceed total file size." if chunk_size && chunk_size > file_size
|
|
103
|
+
|
|
104
|
+
chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def compute_mode(file_size, total_parts, etag, opts)
|
|
108
|
+
chunk_size = compute_chunk(opts[:chunk_size], file_size)
|
|
109
|
+
part_size = (file_size.to_f / total_parts).ceil
|
|
110
|
+
|
|
111
|
+
resolve_temp_path(opts)
|
|
112
|
+
if chunk_size < part_size
|
|
113
|
+
multithreaded_get_by_ranges(file_size, etag, opts)
|
|
114
|
+
else
|
|
115
|
+
multithreaded_get_by_parts(total_parts, file_size, etag, opts)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def extract_range(value)
|
|
120
|
+
value.match(%r{bytes (?<range>\d+-\d+)/\d+})[:range]
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def multipart_download(opts)
|
|
124
|
+
resp = @client.head_object(head_opts(opts[:params].merge(part_number: 1)))
|
|
69
125
|
count = resp.parts_count
|
|
126
|
+
|
|
70
127
|
if count.nil? || count <= 1
|
|
71
128
|
if resp.content_length <= MIN_CHUNK_SIZE
|
|
72
|
-
single_request
|
|
129
|
+
single_request(opts)
|
|
73
130
|
else
|
|
74
|
-
|
|
131
|
+
resolve_temp_path(opts)
|
|
132
|
+
multithreaded_get_by_ranges(resp.content_length, resp.etag, opts)
|
|
75
133
|
end
|
|
76
134
|
else
|
|
77
|
-
#
|
|
78
|
-
resp = @client.head_object(
|
|
135
|
+
# covers cases when given object is not uploaded via UploadPart API
|
|
136
|
+
resp = @client.head_object(head_opts(opts[:params])) # partNumber is an option
|
|
79
137
|
if resp.content_length <= MIN_CHUNK_SIZE
|
|
80
|
-
single_request
|
|
138
|
+
single_request(opts)
|
|
81
139
|
else
|
|
82
|
-
compute_mode(resp.content_length, count, resp.etag)
|
|
140
|
+
compute_mode(resp.content_length, count, resp.etag, opts)
|
|
83
141
|
end
|
|
84
142
|
end
|
|
85
143
|
end
|
|
86
144
|
|
|
87
|
-
def
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
multithreaded_get_by_ranges(file_size, etag)
|
|
92
|
-
else
|
|
93
|
-
multithreaded_get_by_parts(count, file_size, etag)
|
|
145
|
+
def multithreaded_get_by_parts(total_parts, file_size, etag, opts)
|
|
146
|
+
parts = (1..total_parts).map do |part|
|
|
147
|
+
params = get_opts(opts[:params].merge(part_number: part, if_match: etag))
|
|
148
|
+
Part.new(part_number: part, params: params)
|
|
94
149
|
end
|
|
150
|
+
download_with_executor(PartList.new(parts), file_size, opts)
|
|
95
151
|
end
|
|
96
152
|
|
|
97
|
-
def
|
|
153
|
+
def multithreaded_get_by_ranges(file_size, etag, opts)
|
|
98
154
|
offset = 0
|
|
99
|
-
default_chunk_size = compute_chunk(file_size)
|
|
155
|
+
default_chunk_size = compute_chunk(opts[:chunk_size], file_size)
|
|
100
156
|
chunks = []
|
|
157
|
+
part_number = 1 # parts start at 1
|
|
101
158
|
while offset < file_size
|
|
102
159
|
progress = offset + default_chunk_size
|
|
103
160
|
progress = file_size if progress > file_size
|
|
104
|
-
|
|
161
|
+
params = get_opts(opts[:params].merge(range: "bytes=#{offset}-#{progress - 1}", if_match: etag))
|
|
162
|
+
chunks << Part.new(part_number: part_number, size: (progress - offset), params: params)
|
|
163
|
+
part_number += 1
|
|
105
164
|
offset = progress
|
|
106
165
|
end
|
|
107
|
-
chunks
|
|
166
|
+
download_with_executor(PartList.new(chunks), file_size, opts)
|
|
108
167
|
end
|
|
109
168
|
|
|
110
|
-
def
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
169
|
+
def range_request(opts)
|
|
170
|
+
resp = @client.head_object(head_opts(opts[:params]))
|
|
171
|
+
resolve_temp_path(opts)
|
|
172
|
+
multithreaded_get_by_ranges(resp.content_length, resp.etag, opts)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def resolve_temp_path(opts)
|
|
176
|
+
return if [File, Tempfile].include?(opts[:destination].class)
|
|
177
|
+
|
|
178
|
+
opts[:temp_path] ||= "#{opts[:destination]}.s3tmp.#{SecureRandom.alphanumeric(8)}"
|
|
118
179
|
end
|
|
119
180
|
|
|
120
|
-
def
|
|
121
|
-
|
|
122
|
-
|
|
181
|
+
def single_request(opts)
|
|
182
|
+
params = get_opts(opts[:params]).merge(response_target: opts[:destination])
|
|
183
|
+
params[:on_chunk_received] = single_part_progress(opts) if opts[:progress_callback]
|
|
184
|
+
resp = @client.get_object(params)
|
|
185
|
+
return resp unless opts[:on_checksum_validated]
|
|
186
|
+
|
|
187
|
+
opts[:on_checksum_validated].call(resp.checksum_validated, resp) if resp.checksum_validated
|
|
188
|
+
resp
|
|
123
189
|
end
|
|
124
190
|
|
|
125
|
-
def
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
chunks = []
|
|
129
|
-
part_number = 1 # parts start at 1
|
|
130
|
-
while offset < file_size
|
|
131
|
-
progress = offset + default_chunk_size
|
|
132
|
-
progress = file_size if progress > file_size
|
|
133
|
-
range = "bytes=#{offset}-#{progress - 1}"
|
|
134
|
-
chunks << Part.new(
|
|
135
|
-
part_number: part_number,
|
|
136
|
-
size: (progress-offset),
|
|
137
|
-
params: @params.merge(range: range, if_match: etag)
|
|
138
|
-
)
|
|
139
|
-
part_number += 1
|
|
140
|
-
offset = progress
|
|
141
|
-
end
|
|
142
|
-
download_in_threads(PartList.new(chunks), file_size)
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
def multithreaded_get_by_parts(n_parts, total_size, etag)
|
|
146
|
-
parts = (1..n_parts).map do |part|
|
|
147
|
-
Part.new(part_number: part, params: @params.merge(part_number: part, if_match: etag))
|
|
148
|
-
end
|
|
149
|
-
download_in_threads(PartList.new(parts), total_size)
|
|
150
|
-
end
|
|
151
|
-
|
|
152
|
-
def download_in_threads(pending, total_size)
|
|
153
|
-
threads = []
|
|
154
|
-
progress = MultipartProgress.new(pending, total_size, @progress_callback) if @progress_callback
|
|
155
|
-
@thread_count.times do
|
|
156
|
-
thread = Thread.new do
|
|
157
|
-
begin
|
|
158
|
-
while part = pending.shift
|
|
159
|
-
if progress
|
|
160
|
-
part.params[:on_chunk_received] =
|
|
161
|
-
proc do |_chunk, bytes, total|
|
|
162
|
-
progress.call(part.part_number, bytes, total)
|
|
163
|
-
end
|
|
164
|
-
end
|
|
165
|
-
resp = @client.get_object(part.params)
|
|
166
|
-
write(resp)
|
|
167
|
-
if @on_checksum_validated && resp.checksum_validated
|
|
168
|
-
@on_checksum_validated.call(resp.checksum_validated, resp)
|
|
169
|
-
end
|
|
170
|
-
end
|
|
171
|
-
nil
|
|
172
|
-
rescue => error
|
|
173
|
-
# keep other threads from downloading other parts
|
|
174
|
-
pending.clear!
|
|
175
|
-
raise error
|
|
176
|
-
end
|
|
177
|
-
end
|
|
178
|
-
threads << thread
|
|
191
|
+
def single_part_progress(opts)
|
|
192
|
+
proc do |_chunk, bytes_read, total_size|
|
|
193
|
+
opts[:progress_callback].call([bytes_read], [total_size], total_size)
|
|
179
194
|
end
|
|
180
|
-
threads.map(&:value).compact
|
|
181
195
|
end
|
|
182
196
|
|
|
183
|
-
def
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
197
|
+
def update_progress(progress, part)
|
|
198
|
+
return unless progress.progress_callback
|
|
199
|
+
|
|
200
|
+
part.params[:on_chunk_received] =
|
|
201
|
+
proc do |_chunk, bytes, total|
|
|
202
|
+
progress.call(part.part_number, bytes, total)
|
|
203
|
+
end
|
|
187
204
|
end
|
|
188
205
|
|
|
189
|
-
def
|
|
190
|
-
|
|
191
|
-
params[:on_chunk_received] = single_part_progress if @progress_callback
|
|
192
|
-
resp = @client.get_object(params)
|
|
206
|
+
def execute_checksum_callback(resp, opts)
|
|
207
|
+
return unless opts[:on_checksum_validated] && resp.checksum_validated
|
|
193
208
|
|
|
194
|
-
|
|
209
|
+
opts[:on_checksum_validated].call(resp.checksum_validated, resp)
|
|
210
|
+
end
|
|
195
211
|
|
|
196
|
-
|
|
212
|
+
def validate_destination!(destination)
|
|
213
|
+
valid_types = [String, Pathname, File, Tempfile]
|
|
214
|
+
return if valid_types.include?(destination.class)
|
|
197
215
|
|
|
198
|
-
|
|
216
|
+
raise ArgumentError, "Invalid destination, expected #{valid_types.join(', ')} but got: #{destination.class}"
|
|
199
217
|
end
|
|
200
218
|
|
|
201
|
-
def
|
|
202
|
-
|
|
203
|
-
|
|
219
|
+
def validate_opts!(opts)
|
|
220
|
+
if opts[:on_checksum_validated] && !opts[:on_checksum_validated].respond_to?(:call)
|
|
221
|
+
raise ArgumentError, ':on_checksum_validated must be callable'
|
|
204
222
|
end
|
|
223
|
+
|
|
224
|
+
valid_modes = %w[auto get_range single_request]
|
|
225
|
+
unless valid_modes.include?(opts[:mode])
|
|
226
|
+
msg = "Invalid mode #{opts[:mode]} provided, :mode should be single_request, get_range or auto"
|
|
227
|
+
raise ArgumentError, msg
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
if opts[:mode] == 'get_range' && opts[:chunk_size].nil?
|
|
231
|
+
raise ArgumentError, 'In get_range mode, :chunk_size must be provided'
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
if opts[:chunk_size] && opts[:chunk_size] <= 0
|
|
235
|
+
raise ArgumentError, ':chunk_size must be positive'
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def validate_range(actual, expected)
|
|
240
|
+
return if actual == expected.match(/bytes=(?<range>\d+-\d+)/)[:range]
|
|
241
|
+
|
|
242
|
+
raise MultipartDownloadError, "multipart download failed: expected range of #{expected} but got #{actual}"
|
|
205
243
|
end
|
|
206
244
|
|
|
245
|
+
def write(body, range, opts)
|
|
246
|
+
path = opts[:temp_path] || opts[:destination]
|
|
247
|
+
File.write(path, body.read, range.split('-').first.to_i)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# @api private
|
|
207
251
|
class Part < Struct.new(:part_number, :size, :params)
|
|
208
252
|
include Aws::Structure
|
|
209
253
|
end
|
|
@@ -242,6 +286,8 @@ module Aws
|
|
|
242
286
|
@progress_callback = progress_callback
|
|
243
287
|
end
|
|
244
288
|
|
|
289
|
+
attr_reader :progress_callback
|
|
290
|
+
|
|
245
291
|
def call(part_number, bytes_received, total)
|
|
246
292
|
# part numbers start at 1
|
|
247
293
|
@bytes_received[part_number - 1] = bytes_received
|
|
@@ -7,23 +7,21 @@ module Aws
|
|
|
7
7
|
# @api private
|
|
8
8
|
class FileUploader
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
DEFAULT_MULTIPART_THRESHOLD = 100 * 1024 * 1024
|
|
11
11
|
|
|
12
12
|
# @param [Hash] options
|
|
13
13
|
# @option options [Client] :client
|
|
14
14
|
# @option options [Integer] :multipart_threshold (104857600)
|
|
15
15
|
def initialize(options = {})
|
|
16
|
-
@options = options
|
|
17
16
|
@client = options[:client] || Client.new
|
|
18
|
-
@
|
|
19
|
-
|
|
17
|
+
@executor = options[:executor]
|
|
18
|
+
@multipart_threshold = options[:multipart_threshold] || DEFAULT_MULTIPART_THRESHOLD
|
|
20
19
|
end
|
|
21
20
|
|
|
22
21
|
# @return [Client]
|
|
23
22
|
attr_reader :client
|
|
24
23
|
|
|
25
|
-
# @return [Integer] Files larger than or equal to this in bytes are uploaded
|
|
26
|
-
# using a {MultipartFileUploader}.
|
|
24
|
+
# @return [Integer] Files larger than or equal to this in bytes are uploaded using a {MultipartFileUploader}.
|
|
27
25
|
attr_reader :multipart_threshold
|
|
28
26
|
|
|
29
27
|
# @param [String, Pathname, File, Tempfile] source The file to upload.
|
|
@@ -38,11 +36,9 @@ module Aws
|
|
|
38
36
|
# @return [void]
|
|
39
37
|
def upload(source, options = {})
|
|
40
38
|
Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
|
|
41
|
-
if File.size(source) >= multipart_threshold
|
|
42
|
-
MultipartFileUploader.new(@
|
|
39
|
+
if File.size(source) >= @multipart_threshold
|
|
40
|
+
MultipartFileUploader.new(client: @client, executor: @executor).upload(source, options)
|
|
43
41
|
else
|
|
44
|
-
# remove multipart parameters not supported by put_object
|
|
45
|
-
options.delete(:thread_count)
|
|
46
42
|
put_object(source, options)
|
|
47
43
|
end
|
|
48
44
|
end
|
|
@@ -50,9 +46,9 @@ module Aws
|
|
|
50
46
|
|
|
51
47
|
private
|
|
52
48
|
|
|
53
|
-
def open_file(source)
|
|
54
|
-
if String
|
|
55
|
-
File.open(source, 'rb'
|
|
49
|
+
def open_file(source, &block)
|
|
50
|
+
if source.is_a?(String) || source.is_a?(Pathname)
|
|
51
|
+
File.open(source, 'rb', &block)
|
|
56
52
|
else
|
|
57
53
|
yield(source)
|
|
58
54
|
end
|