aws-sdk-s3 1.199.1 → 1.200.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/VERSION +1 -1
- data/lib/aws-sdk-s3/client.rb +1 -1
- data/lib/aws-sdk-s3/customizations/object.rb +23 -19
- data/lib/aws-sdk-s3/customizations.rb +1 -0
- data/lib/aws-sdk-s3/default_executor.rb +103 -0
- data/lib/aws-sdk-s3/file_downloader.rb +175 -112
- data/lib/aws-sdk-s3/file_uploader.rb +6 -8
- data/lib/aws-sdk-s3/multipart_file_uploader.rb +61 -56
- data/lib/aws-sdk-s3/multipart_stream_uploader.rb +41 -44
- data/lib/aws-sdk-s3/transfer_manager.rb +77 -25
- data/lib/aws-sdk-s3.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50d0c1be8e15a46bcc05e13aeba9bf9ee61addc0decd0c0480a7809a34f7726c
|
4
|
+
data.tar.gz: 5b0634d0d3efee830b742591722f62e34acf6f3c0ccbb50ffbbd8b51faacaa92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f4e85a59226b4b0cebe9ebdecccfcb7a147339992ed740105faf9f7f4423a02fa0d4cd18515341e9ec24a7626bbcde4f2daafa9796dcb6e3e1171bd15cafe7a
|
7
|
+
data.tar.gz: 6d76cb2b2cb4b4d0622ea54b8d7300a3eb3246209c9ee9e770888c71bef67fe55ae468c238f390916c3dcaf2b6a02ee10b786774bb6e7aab7cdeccb2e670e87e
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,13 @@
|
|
1
1
|
Unreleased Changes
|
2
2
|
------------------
|
3
3
|
|
4
|
+
1.200.0 (2025-10-15)
|
5
|
+
------------------
|
6
|
+
|
7
|
+
* Feature - Add lightweight thread pool executor for multipart `download_file`, `upload_file` and `upload_stream`.
|
8
|
+
|
9
|
+
* Feature - Add custom executor support for `Aws::S3::TransferManager`.
|
10
|
+
|
4
11
|
1.199.1 (2025-09-25)
|
5
12
|
------------------
|
6
13
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.200.0
|
data/lib/aws-sdk-s3/client.rb
CHANGED
@@ -21735,7 +21735,7 @@ module Aws::S3
|
|
21735
21735
|
tracer: tracer
|
21736
21736
|
)
|
21737
21737
|
context[:gem_name] = 'aws-sdk-s3'
|
21738
|
-
context[:gem_version] = '1.
|
21738
|
+
context[:gem_version] = '1.200.0'
|
21739
21739
|
Seahorse::Client::Request.new(handlers, context)
|
21740
21740
|
end
|
21741
21741
|
|
@@ -358,8 +358,8 @@ module Aws
|
|
358
358
|
# {Client#complete_multipart_upload},
|
359
359
|
# and {Client#upload_part} can be provided.
|
360
360
|
#
|
361
|
-
# @option options [Integer] :thread_count (10) The number of parallel
|
362
|
-
#
|
361
|
+
# @option options [Integer] :thread_count (10) The number of parallel multipart uploads.
|
362
|
+
# An additional thread is used internally for task coordination.
|
363
363
|
#
|
364
364
|
# @option options [Boolean] :tempfile (false) Normally read data is stored
|
365
365
|
# in memory when building the parts in order to complete the underlying
|
@@ -383,19 +383,18 @@ module Aws
|
|
383
383
|
# @see Client#complete_multipart_upload
|
384
384
|
# @see Client#upload_part
|
385
385
|
def upload_stream(options = {}, &block)
|
386
|
-
|
386
|
+
upload_opts = options.merge(bucket: bucket_name, key: key)
|
387
|
+
executor = DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
|
387
388
|
uploader = MultipartStreamUploader.new(
|
388
389
|
client: client,
|
389
|
-
|
390
|
-
tempfile:
|
391
|
-
part_size:
|
390
|
+
executor: executor,
|
391
|
+
tempfile: upload_opts.delete(:tempfile),
|
392
|
+
part_size: upload_opts.delete(:part_size)
|
392
393
|
)
|
393
394
|
Aws::Plugins::UserAgent.metric('RESOURCE_MODEL') do
|
394
|
-
uploader.upload(
|
395
|
-
uploading_options.merge(bucket: bucket_name, key: key),
|
396
|
-
&block
|
397
|
-
)
|
395
|
+
uploader.upload(upload_opts, &block)
|
398
396
|
end
|
397
|
+
executor.shutdown
|
399
398
|
true
|
400
399
|
end
|
401
400
|
deprecated(:upload_stream, use: 'Aws::S3::TransferManager#upload_stream', version: 'next major version')
|
@@ -458,12 +457,18 @@ module Aws
|
|
458
457
|
# @see Client#complete_multipart_upload
|
459
458
|
# @see Client#upload_part
|
460
459
|
def upload_file(source, options = {})
|
461
|
-
|
462
|
-
|
460
|
+
upload_opts = options.merge(bucket: bucket_name, key: key)
|
461
|
+
executor = DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
|
462
|
+
uploader = FileUploader.new(
|
463
|
+
client: client,
|
464
|
+
executor: executor,
|
465
|
+
multipart_threshold: upload_opts.delete(:multipart_threshold)
|
466
|
+
)
|
463
467
|
response = Aws::Plugins::UserAgent.metric('RESOURCE_MODEL') do
|
464
|
-
uploader.upload(source,
|
468
|
+
uploader.upload(source, upload_opts)
|
465
469
|
end
|
466
470
|
yield response if block_given?
|
471
|
+
executor.shutdown
|
467
472
|
true
|
468
473
|
end
|
469
474
|
deprecated(:upload_file, use: 'Aws::S3::TransferManager#upload_file', version: 'next major version')
|
@@ -512,10 +517,6 @@ module Aws
|
|
512
517
|
#
|
513
518
|
# @option options [Integer] :thread_count (10) Customize threads used in the multipart download.
|
514
519
|
#
|
515
|
-
# @option options [String] :version_id The object version id used to retrieve the object.
|
516
|
-
#
|
517
|
-
# @see https://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectVersioning.html ObjectVersioning
|
518
|
-
#
|
519
520
|
# @option options [String] :checksum_mode ("ENABLED")
|
520
521
|
# When `"ENABLED"` and the object has a stored checksum, it will be used to validate the download and will
|
521
522
|
# raise an `Aws::Errors::ChecksumError` if checksum validation fails. You may provide a `on_checksum_validated`
|
@@ -539,10 +540,13 @@ module Aws
|
|
539
540
|
# @see Client#get_object
|
540
541
|
# @see Client#head_object
|
541
542
|
def download_file(destination, options = {})
|
542
|
-
|
543
|
+
download_opts = options.merge(bucket: bucket_name, key: key)
|
544
|
+
executor = DefaultExecutor.new(max_threads: download_opts.delete([:thread_count]))
|
545
|
+
downloader = FileDownloader.new(client: client, executor: executor)
|
543
546
|
Aws::Plugins::UserAgent.metric('RESOURCE_MODEL') do
|
544
|
-
downloader.download(destination,
|
547
|
+
downloader.download(destination, download_opts)
|
545
548
|
end
|
549
|
+
executor.shutdown
|
546
550
|
true
|
547
551
|
end
|
548
552
|
deprecated(:download_file, use: 'Aws::S3::TransferManager#download_file', version: 'next major version')
|
@@ -7,6 +7,7 @@ module Aws
|
|
7
7
|
autoload :Encryption, 'aws-sdk-s3/encryption'
|
8
8
|
autoload :EncryptionV2, 'aws-sdk-s3/encryption_v2'
|
9
9
|
autoload :FilePart, 'aws-sdk-s3/file_part'
|
10
|
+
autoload :DefaultExecutor, 'aws-sdk-s3/default_executor'
|
10
11
|
autoload :FileUploader, 'aws-sdk-s3/file_uploader'
|
11
12
|
autoload :FileDownloader, 'aws-sdk-s3/file_downloader'
|
12
13
|
autoload :LegacySigner, 'aws-sdk-s3/legacy_signer'
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Aws
|
4
|
+
module S3
|
5
|
+
# @api private
|
6
|
+
class DefaultExecutor
|
7
|
+
DEFAULT_MAX_THREADS = 10
|
8
|
+
RUNNING = :running
|
9
|
+
SHUTTING_DOWN = :shutting_down
|
10
|
+
SHUTDOWN = :shutdown
|
11
|
+
|
12
|
+
def initialize(options = {})
|
13
|
+
@max_threads = options[:max_threads] || DEFAULT_MAX_THREADS
|
14
|
+
@state = RUNNING
|
15
|
+
@queue = Queue.new
|
16
|
+
@pool = []
|
17
|
+
@mutex = Mutex.new
|
18
|
+
end
|
19
|
+
|
20
|
+
# Submits a task for execution.
|
21
|
+
# @param [Object] args Variable number of arguments to pass to the block
|
22
|
+
# @param [Proc] block The block to be executed
|
23
|
+
# @return [Boolean] Returns true if the task was submitted successfully
|
24
|
+
def post(*args, &block)
|
25
|
+
@mutex.synchronize do
|
26
|
+
raise 'Executor has been shutdown and is no longer accepting tasks' unless @state == RUNNING
|
27
|
+
|
28
|
+
@queue << [args, block]
|
29
|
+
ensure_worker_available
|
30
|
+
end
|
31
|
+
true
|
32
|
+
end
|
33
|
+
|
34
|
+
# Immediately terminates all worker threads and clears pending tasks.
|
35
|
+
# This is a forceful shutdown that doesn't wait for running tasks to complete.
|
36
|
+
#
|
37
|
+
# @return [Boolean] true when termination is complete
|
38
|
+
def kill
|
39
|
+
@mutex.synchronize do
|
40
|
+
@state = SHUTDOWN
|
41
|
+
@pool.each(&:kill)
|
42
|
+
@pool.clear
|
43
|
+
@queue.clear
|
44
|
+
end
|
45
|
+
true
|
46
|
+
end
|
47
|
+
|
48
|
+
# Gracefully shuts down the executor, optionally with a timeout.
|
49
|
+
# Stops accepting new tasks and waits for running tasks to complete.
|
50
|
+
#
|
51
|
+
# @param timeout [Numeric, nil] Maximum time in seconds to wait for shutdown.
|
52
|
+
# If nil, waits indefinitely. If timeout expires, remaining threads are killed.
|
53
|
+
# @return [Boolean] true when shutdown is complete
|
54
|
+
def shutdown(timeout = nil)
|
55
|
+
@mutex.synchronize do
|
56
|
+
return true if @state == SHUTDOWN
|
57
|
+
|
58
|
+
@state = SHUTTING_DOWN
|
59
|
+
@pool.size.times { @queue << :shutdown }
|
60
|
+
end
|
61
|
+
|
62
|
+
if timeout
|
63
|
+
deadline = Time.now + timeout
|
64
|
+
@pool.each do |thread|
|
65
|
+
remaining = deadline - Time.now
|
66
|
+
break if remaining <= 0
|
67
|
+
|
68
|
+
thread.join([remaining, 0].max)
|
69
|
+
end
|
70
|
+
@pool.select(&:alive?).each(&:kill)
|
71
|
+
else
|
72
|
+
@pool.each(&:join)
|
73
|
+
end
|
74
|
+
|
75
|
+
@mutex.synchronize do
|
76
|
+
@pool.clear
|
77
|
+
@state = SHUTDOWN
|
78
|
+
end
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def ensure_worker_available
|
85
|
+
return unless @state == RUNNING
|
86
|
+
|
87
|
+
@pool.select!(&:alive?)
|
88
|
+
@pool << spawn_worker if @pool.size < @max_threads
|
89
|
+
end
|
90
|
+
|
91
|
+
def spawn_worker
|
92
|
+
Thread.new do
|
93
|
+
while (job = @queue.shift)
|
94
|
+
break if job == :shutdown
|
95
|
+
|
96
|
+
args, block = job
|
97
|
+
block.call(*args)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -8,184 +8,245 @@ module Aws
|
|
8
8
|
module S3
|
9
9
|
# @api private
|
10
10
|
class FileDownloader
|
11
|
-
|
12
11
|
MIN_CHUNK_SIZE = 5 * 1024 * 1024
|
13
12
|
MAX_PARTS = 10_000
|
13
|
+
HEAD_OPTIONS = Set.new(Client.api.operation(:head_object).input.shape.member_names)
|
14
|
+
GET_OPTIONS = Set.new(Client.api.operation(:get_object).input.shape.member_names)
|
14
15
|
|
15
16
|
def initialize(options = {})
|
16
17
|
@client = options[:client] || Client.new
|
18
|
+
@executor = options[:executor]
|
17
19
|
end
|
18
20
|
|
19
21
|
# @return [Client]
|
20
22
|
attr_reader :client
|
21
23
|
|
22
24
|
def download(destination, options = {})
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
@destination = destination
|
29
|
-
@mode = options.delete(:mode) || 'auto'
|
30
|
-
@thread_count = options.delete(:thread_count) || 10
|
31
|
-
@chunk_size = options.delete(:chunk_size)
|
32
|
-
@on_checksum_validated = options.delete(:on_checksum_validated)
|
33
|
-
@progress_callback = options.delete(:progress_callback)
|
34
|
-
@params = options
|
35
|
-
validate!
|
25
|
+
validate_destination!(destination)
|
26
|
+
opts = build_download_opts(destination, options)
|
27
|
+
validate_opts!(opts)
|
36
28
|
|
37
29
|
Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
|
38
|
-
case
|
39
|
-
when 'auto' then multipart_download
|
40
|
-
when 'single_request' then single_request
|
41
|
-
when 'get_range'
|
42
|
-
raise ArgumentError, 'In get_range mode, :chunk_size must be provided' unless @chunk_size
|
43
|
-
|
44
|
-
resp = @client.head_object(@params)
|
45
|
-
multithreaded_get_by_ranges(resp.content_length, resp.etag)
|
46
|
-
else
|
47
|
-
raise ArgumentError, "Invalid mode #{@mode} provided, :mode should be single_request, get_range or auto"
|
30
|
+
case opts[:mode]
|
31
|
+
when 'auto' then multipart_download(opts)
|
32
|
+
when 'single_request' then single_request(opts)
|
33
|
+
when 'get_range' then range_request(opts)
|
48
34
|
end
|
49
35
|
end
|
50
|
-
File.rename(
|
36
|
+
File.rename(opts[:temp_path], destination) if opts[:temp_path]
|
51
37
|
ensure
|
52
|
-
|
38
|
+
cleanup_temp_file(opts)
|
53
39
|
end
|
54
40
|
|
55
41
|
private
|
56
42
|
|
57
|
-
def
|
58
|
-
|
43
|
+
def build_download_opts(destination, opts)
|
44
|
+
{
|
45
|
+
destination: destination,
|
46
|
+
mode: opts.delete(:mode) || 'auto',
|
47
|
+
chunk_size: opts.delete(:chunk_size),
|
48
|
+
on_checksum_validated: opts.delete(:on_checksum_validated),
|
49
|
+
progress_callback: opts.delete(:progress_callback),
|
50
|
+
params: opts,
|
51
|
+
temp_path: nil
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def cleanup_temp_file(opts)
|
56
|
+
return unless opts
|
57
|
+
|
58
|
+
temp_file = opts[:temp_path]
|
59
|
+
File.delete(temp_file) if temp_file && File.exist?(temp_file)
|
60
|
+
end
|
61
|
+
|
62
|
+
def download_with_executor(part_list, total_size, opts)
|
63
|
+
download_attempts = 0
|
64
|
+
completion_queue = Queue.new
|
65
|
+
abort_download = false
|
66
|
+
error = nil
|
67
|
+
progress = MultipartProgress.new(part_list, total_size, opts[:progress_callback])
|
68
|
+
|
69
|
+
while (part = part_list.shift)
|
70
|
+
break if abort_download
|
71
|
+
|
72
|
+
download_attempts += 1
|
73
|
+
@executor.post(part) do |p|
|
74
|
+
update_progress(progress, p)
|
75
|
+
resp = @client.get_object(p.params)
|
76
|
+
range = extract_range(resp.content_range)
|
77
|
+
validate_range(range, p.params[:range]) if p.params[:range]
|
78
|
+
write(resp.body, range, opts)
|
79
|
+
|
80
|
+
execute_checksum_callback(resp, opts)
|
81
|
+
rescue StandardError => e
|
82
|
+
abort_download = true
|
83
|
+
error = e
|
84
|
+
ensure
|
85
|
+
completion_queue << :done
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
download_attempts.times { completion_queue.pop }
|
90
|
+
raise error unless error.nil?
|
91
|
+
end
|
92
|
+
|
93
|
+
def get_opts(opts)
|
94
|
+
GET_OPTIONS.each_with_object({}) { |k, h| h[k] = opts[k] if opts.key?(k) }
|
95
|
+
end
|
96
|
+
|
97
|
+
def head_opts(opts)
|
98
|
+
HEAD_OPTIONS.each_with_object({}) { |k, h| h[k] = opts[k] if opts.key?(k) }
|
99
|
+
end
|
100
|
+
|
101
|
+
def compute_chunk(chunk_size, file_size)
|
102
|
+
raise ArgumentError, ":chunk_size shouldn't exceed total file size." if chunk_size && chunk_size > file_size
|
59
103
|
|
60
|
-
|
104
|
+
chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
|
61
105
|
end
|
62
106
|
|
63
|
-
def
|
64
|
-
|
107
|
+
def compute_mode(file_size, total_parts, etag, opts)
|
108
|
+
chunk_size = compute_chunk(opts[:chunk_size], file_size)
|
109
|
+
part_size = (file_size.to_f / total_parts).ceil
|
110
|
+
|
111
|
+
resolve_temp_path(opts)
|
112
|
+
if chunk_size < part_size
|
113
|
+
multithreaded_get_by_ranges(file_size, etag, opts)
|
114
|
+
else
|
115
|
+
multithreaded_get_by_parts(total_parts, file_size, etag, opts)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def extract_range(value)
|
120
|
+
value.match(%r{bytes (?<range>\d+-\d+)/\d+})[:range]
|
121
|
+
end
|
122
|
+
|
123
|
+
def multipart_download(opts)
|
124
|
+
resp = @client.head_object(head_opts(opts[:params].merge(part_number: 1)))
|
65
125
|
count = resp.parts_count
|
66
126
|
|
67
127
|
if count.nil? || count <= 1
|
68
128
|
if resp.content_length <= MIN_CHUNK_SIZE
|
69
|
-
single_request
|
129
|
+
single_request(opts)
|
70
130
|
else
|
71
|
-
|
131
|
+
resolve_temp_path(opts)
|
132
|
+
multithreaded_get_by_ranges(resp.content_length, resp.etag, opts)
|
72
133
|
end
|
73
134
|
else
|
74
135
|
# covers cases when given object is not uploaded via UploadPart API
|
75
|
-
resp = @client.head_object(
|
136
|
+
resp = @client.head_object(head_opts(opts[:params])) # partNumber is an option
|
76
137
|
if resp.content_length <= MIN_CHUNK_SIZE
|
77
|
-
single_request
|
138
|
+
single_request(opts)
|
78
139
|
else
|
79
|
-
compute_mode(resp.content_length, count, resp.etag)
|
140
|
+
compute_mode(resp.content_length, count, resp.etag, opts)
|
80
141
|
end
|
81
142
|
end
|
82
143
|
end
|
83
144
|
|
84
|
-
def
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
multithreaded_get_by_ranges(file_size, etag)
|
89
|
-
else
|
90
|
-
multithreaded_get_by_parts(count, file_size, etag)
|
145
|
+
def multithreaded_get_by_parts(total_parts, file_size, etag, opts)
|
146
|
+
parts = (1..total_parts).map do |part|
|
147
|
+
params = get_opts(opts[:params].merge(part_number: part, if_match: etag))
|
148
|
+
Part.new(part_number: part, params: params)
|
91
149
|
end
|
150
|
+
download_with_executor(PartList.new(parts), file_size, opts)
|
92
151
|
end
|
93
152
|
|
94
|
-
def
|
95
|
-
raise ArgumentError, ":chunk_size shouldn't exceed total file size." if @chunk_size && @chunk_size > file_size
|
96
|
-
|
97
|
-
@chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
|
98
|
-
end
|
99
|
-
|
100
|
-
def multithreaded_get_by_ranges(file_size, etag)
|
153
|
+
def multithreaded_get_by_ranges(file_size, etag, opts)
|
101
154
|
offset = 0
|
102
|
-
default_chunk_size = compute_chunk(file_size)
|
155
|
+
default_chunk_size = compute_chunk(opts[:chunk_size], file_size)
|
103
156
|
chunks = []
|
104
157
|
part_number = 1 # parts start at 1
|
105
158
|
while offset < file_size
|
106
159
|
progress = offset + default_chunk_size
|
107
160
|
progress = file_size if progress > file_size
|
108
|
-
params =
|
161
|
+
params = get_opts(opts[:params].merge(range: "bytes=#{offset}-#{progress - 1}", if_match: etag))
|
109
162
|
chunks << Part.new(part_number: part_number, size: (progress - offset), params: params)
|
110
163
|
part_number += 1
|
111
164
|
offset = progress
|
112
165
|
end
|
113
|
-
|
114
|
-
end
|
115
|
-
|
116
|
-
def multithreaded_get_by_parts(n_parts, total_size, etag)
|
117
|
-
parts = (1..n_parts).map do |part|
|
118
|
-
Part.new(part_number: part, params: @params.merge(part_number: part, if_match: etag))
|
119
|
-
end
|
120
|
-
download_in_threads(PartList.new(parts), total_size)
|
121
|
-
end
|
122
|
-
|
123
|
-
def download_in_threads(pending, total_size)
|
124
|
-
threads = []
|
125
|
-
progress = MultipartProgress.new(pending, total_size, @progress_callback) if @progress_callback
|
126
|
-
unless [File, Tempfile].include?(@destination.class)
|
127
|
-
@temp_path = "#{@destination}.s3tmp.#{SecureRandom.alphanumeric(8)}"
|
128
|
-
end
|
129
|
-
@thread_count.times do
|
130
|
-
thread = Thread.new do
|
131
|
-
begin
|
132
|
-
while (part = pending.shift)
|
133
|
-
if progress
|
134
|
-
part.params[:on_chunk_received] =
|
135
|
-
proc do |_chunk, bytes, total|
|
136
|
-
progress.call(part.part_number, bytes, total)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
resp = @client.get_object(part.params)
|
140
|
-
range = extract_range(resp.content_range)
|
141
|
-
validate_range(range, part.params[:range]) if part.params[:range]
|
142
|
-
write(resp.body, range)
|
143
|
-
if @on_checksum_validated && resp.checksum_validated
|
144
|
-
@on_checksum_validated.call(resp.checksum_validated, resp)
|
145
|
-
end
|
146
|
-
end
|
147
|
-
nil
|
148
|
-
rescue StandardError => e
|
149
|
-
pending.clear! # keep other threads from downloading other parts
|
150
|
-
raise e
|
151
|
-
end
|
152
|
-
end
|
153
|
-
threads << thread
|
154
|
-
end
|
155
|
-
threads.map(&:value).compact
|
166
|
+
download_with_executor(PartList.new(chunks), file_size, opts)
|
156
167
|
end
|
157
168
|
|
158
|
-
def
|
159
|
-
|
169
|
+
def range_request(opts)
|
170
|
+
resp = @client.head_object(head_opts(opts[:params]))
|
171
|
+
resolve_temp_path(opts)
|
172
|
+
multithreaded_get_by_ranges(resp.content_length, resp.etag, opts)
|
160
173
|
end
|
161
174
|
|
162
|
-
def
|
163
|
-
return if
|
164
|
-
|
165
|
-
raise MultipartDownloadError, "multipart download failed: expected range of #{expected} but got #{actual}"
|
166
|
-
end
|
175
|
+
def resolve_temp_path(opts)
|
176
|
+
return if [File, Tempfile].include?(opts[:destination].class)
|
167
177
|
|
168
|
-
|
169
|
-
path = @temp_path || @destination
|
170
|
-
File.write(path, body.read, range.split('-').first.to_i)
|
178
|
+
opts[:temp_path] ||= "#{opts[:destination]}.s3tmp.#{SecureRandom.alphanumeric(8)}"
|
171
179
|
end
|
172
180
|
|
173
|
-
def single_request
|
174
|
-
params =
|
175
|
-
params[:on_chunk_received] = single_part_progress if
|
181
|
+
def single_request(opts)
|
182
|
+
params = get_opts(opts[:params]).merge(response_target: opts[:destination])
|
183
|
+
params[:on_chunk_received] = single_part_progress(opts) if opts[:progress_callback]
|
176
184
|
resp = @client.get_object(params)
|
177
|
-
return resp unless
|
185
|
+
return resp unless opts[:on_checksum_validated]
|
178
186
|
|
179
|
-
|
187
|
+
opts[:on_checksum_validated].call(resp.checksum_validated, resp) if resp.checksum_validated
|
180
188
|
resp
|
181
189
|
end
|
182
190
|
|
183
|
-
def single_part_progress
|
191
|
+
def single_part_progress(opts)
|
184
192
|
proc do |_chunk, bytes_read, total_size|
|
185
|
-
|
193
|
+
opts[:progress_callback].call([bytes_read], [total_size], total_size)
|
186
194
|
end
|
187
195
|
end
|
188
196
|
|
197
|
+
def update_progress(progress, part)
|
198
|
+
return unless progress.progress_callback
|
199
|
+
|
200
|
+
part.params[:on_chunk_received] =
|
201
|
+
proc do |_chunk, bytes, total|
|
202
|
+
progress.call(part.part_number, bytes, total)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def execute_checksum_callback(resp, opts)
|
207
|
+
return unless opts[:on_checksum_validated] && resp.checksum_validated
|
208
|
+
|
209
|
+
opts[:on_checksum_validated].call(resp.checksum_validated, resp)
|
210
|
+
end
|
211
|
+
|
212
|
+
def validate_destination!(destination)
|
213
|
+
valid_types = [String, Pathname, File, Tempfile]
|
214
|
+
return if valid_types.include?(destination.class)
|
215
|
+
|
216
|
+
raise ArgumentError, "Invalid destination, expected #{valid_types.join(', ')} but got: #{destination.class}"
|
217
|
+
end
|
218
|
+
|
219
|
+
def validate_opts!(opts)
|
220
|
+
if opts[:on_checksum_validated] && !opts[:on_checksum_validated].respond_to?(:call)
|
221
|
+
raise ArgumentError, ':on_checksum_validated must be callable'
|
222
|
+
end
|
223
|
+
|
224
|
+
valid_modes = %w[auto get_range single_request]
|
225
|
+
unless valid_modes.include?(opts[:mode])
|
226
|
+
msg = "Invalid mode #{opts[:mode]} provided, :mode should be single_request, get_range or auto"
|
227
|
+
raise ArgumentError, msg
|
228
|
+
end
|
229
|
+
|
230
|
+
if opts[:mode] == 'get_range' && opts[:chunk_size].nil?
|
231
|
+
raise ArgumentError, 'In get_range mode, :chunk_size must be provided'
|
232
|
+
end
|
233
|
+
|
234
|
+
if opts[:chunk_size] && opts[:chunk_size] <= 0
|
235
|
+
raise ArgumentError, ':chunk_size must be positive'
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def validate_range(actual, expected)
|
240
|
+
return if actual == expected.match(/bytes=(?<range>\d+-\d+)/)[:range]
|
241
|
+
|
242
|
+
raise MultipartDownloadError, "multipart download failed: expected range of #{expected} but got #{actual}"
|
243
|
+
end
|
244
|
+
|
245
|
+
def write(body, range, opts)
|
246
|
+
path = opts[:temp_path] || opts[:destination]
|
247
|
+
File.write(path, body.read, range.split('-').first.to_i)
|
248
|
+
end
|
249
|
+
|
189
250
|
# @api private
|
190
251
|
class Part < Struct.new(:part_number, :size, :params)
|
191
252
|
include Aws::Structure
|
@@ -225,6 +286,8 @@ module Aws
|
|
225
286
|
@progress_callback = progress_callback
|
226
287
|
end
|
227
288
|
|
289
|
+
attr_reader :progress_callback
|
290
|
+
|
228
291
|
def call(part_number, bytes_received, total)
|
229
292
|
# part numbers start at 1
|
230
293
|
@bytes_received[part_number - 1] = bytes_received
|
@@ -13,8 +13,8 @@ module Aws
|
|
13
13
|
# @option options [Client] :client
|
14
14
|
# @option options [Integer] :multipart_threshold (104857600)
|
15
15
|
def initialize(options = {})
|
16
|
-
@options = options
|
17
16
|
@client = options[:client] || Client.new
|
17
|
+
@executor = options[:executor]
|
18
18
|
@multipart_threshold = options[:multipart_threshold] || DEFAULT_MULTIPART_THRESHOLD
|
19
19
|
end
|
20
20
|
|
@@ -36,11 +36,9 @@ module Aws
|
|
36
36
|
# @return [void]
|
37
37
|
def upload(source, options = {})
|
38
38
|
Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
|
39
|
-
if File.size(source) >= multipart_threshold
|
40
|
-
MultipartFileUploader.new(@
|
39
|
+
if File.size(source) >= @multipart_threshold
|
40
|
+
MultipartFileUploader.new(client: @client, executor: @executor).upload(source, options)
|
41
41
|
else
|
42
|
-
# remove multipart parameters not supported by put_object
|
43
|
-
options.delete(:thread_count)
|
44
42
|
put_object(source, options)
|
45
43
|
end
|
46
44
|
end
|
@@ -48,9 +46,9 @@ module Aws
|
|
48
46
|
|
49
47
|
private
|
50
48
|
|
51
|
-
def open_file(source)
|
52
|
-
if String
|
53
|
-
File.open(source, 'rb'
|
49
|
+
def open_file(source, &block)
|
50
|
+
if source.is_a?(String) || source.is_a?(Pathname)
|
51
|
+
File.open(source, 'rb', &block)
|
54
52
|
else
|
55
53
|
yield(source)
|
56
54
|
end
|
@@ -7,10 +7,8 @@ module Aws
|
|
7
7
|
module S3
|
8
8
|
# @api private
|
9
9
|
class MultipartFileUploader
|
10
|
-
|
11
10
|
MIN_PART_SIZE = 5 * 1024 * 1024 # 5MB
|
12
11
|
MAX_PARTS = 10_000
|
13
|
-
DEFAULT_THREAD_COUNT = 10
|
14
12
|
CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
|
15
13
|
COMPLETE_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
|
16
14
|
UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
|
@@ -21,10 +19,9 @@ module Aws
|
|
21
19
|
)
|
22
20
|
|
23
21
|
# @option options [Client] :client
|
24
|
-
# @option options [Integer] :thread_count (DEFAULT_THREAD_COUNT)
|
25
22
|
def initialize(options = {})
|
26
23
|
@client = options[:client] || Client.new
|
27
|
-
@
|
24
|
+
@executor = options[:executor]
|
28
25
|
end
|
29
26
|
|
30
27
|
# @return [Client]
|
@@ -38,11 +35,12 @@ module Aws
|
|
38
35
|
# It will be invoked with [bytes_read], [total_sizes]
|
39
36
|
# @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
|
40
37
|
def upload(source, options = {})
|
41
|
-
|
38
|
+
file_size = File.size(source)
|
39
|
+
raise ArgumentError, 'unable to multipart upload files smaller than 5MB' if file_size < MIN_PART_SIZE
|
42
40
|
|
43
41
|
upload_id = initiate_upload(options)
|
44
|
-
parts = upload_parts(upload_id, source, options)
|
45
|
-
complete_upload(upload_id, parts,
|
42
|
+
parts = upload_parts(upload_id, source, file_size, options)
|
43
|
+
complete_upload(upload_id, parts, file_size, options)
|
46
44
|
end
|
47
45
|
|
48
46
|
private
|
@@ -51,22 +49,22 @@ module Aws
|
|
51
49
|
@client.create_multipart_upload(create_opts(options)).upload_id
|
52
50
|
end
|
53
51
|
|
54
|
-
def complete_upload(upload_id, parts,
|
52
|
+
def complete_upload(upload_id, parts, file_size, options)
|
55
53
|
@client.complete_multipart_upload(
|
56
54
|
**complete_opts(options).merge(
|
57
55
|
upload_id: upload_id,
|
58
56
|
multipart_upload: { parts: parts },
|
59
|
-
mpu_object_size:
|
57
|
+
mpu_object_size: file_size
|
60
58
|
)
|
61
59
|
)
|
62
60
|
rescue StandardError => e
|
63
61
|
abort_upload(upload_id, options, [e])
|
64
62
|
end
|
65
63
|
|
66
|
-
def upload_parts(upload_id, source, options)
|
64
|
+
def upload_parts(upload_id, source, file_size, options)
|
67
65
|
completed = PartList.new
|
68
|
-
pending = PartList.new(compute_parts(upload_id, source, options))
|
69
|
-
errors =
|
66
|
+
pending = PartList.new(compute_parts(upload_id, source, file_size, options))
|
67
|
+
errors = upload_with_executor(pending, completed, options)
|
70
68
|
if errors.empty?
|
71
69
|
completed.to_a.sort_by { |part| part[:part_number] }
|
72
70
|
else
|
@@ -86,17 +84,20 @@ module Aws
|
|
86
84
|
raise MultipartUploadError.new(msg, errors + [e])
|
87
85
|
end
|
88
86
|
|
89
|
-
def compute_parts(upload_id, source, options)
|
90
|
-
|
91
|
-
default_part_size = compute_default_part_size(size)
|
87
|
+
def compute_parts(upload_id, source, file_size, options)
|
88
|
+
default_part_size = compute_default_part_size(file_size)
|
92
89
|
offset = 0
|
93
90
|
part_number = 1
|
94
91
|
parts = []
|
95
|
-
while offset <
|
92
|
+
while offset < file_size
|
96
93
|
parts << upload_part_opts(options).merge(
|
97
94
|
upload_id: upload_id,
|
98
95
|
part_number: part_number,
|
99
|
-
body: FilePart.new(
|
96
|
+
body: FilePart.new(
|
97
|
+
source: source,
|
98
|
+
offset: offset,
|
99
|
+
size: part_size(file_size, default_part_size, offset)
|
100
|
+
)
|
100
101
|
)
|
101
102
|
part_number += 1
|
102
103
|
offset += default_part_size
|
@@ -115,17 +116,13 @@ module Aws
|
|
115
116
|
def create_opts(options)
|
116
117
|
opts = { checksum_algorithm: Aws::Plugins::ChecksumAlgorithm::DEFAULT_CHECKSUM }
|
117
118
|
opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
|
118
|
-
CREATE_OPTIONS.each_with_object(opts)
|
119
|
-
hash[key] = options[key] if options.key?(key)
|
120
|
-
end
|
119
|
+
CREATE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
|
121
120
|
end
|
122
121
|
|
123
122
|
def complete_opts(options)
|
124
123
|
opts = {}
|
125
124
|
opts[:checksum_type] = 'FULL_OBJECT' if has_checksum_key?(options.keys)
|
126
|
-
COMPLETE_OPTIONS.each_with_object(opts)
|
127
|
-
hash[key] = options[key] if options.key?(key)
|
128
|
-
end
|
125
|
+
COMPLETE_OPTIONS.each_with_object(opts) { |k, h| h[k] = options[k] if options.key?(k) }
|
129
126
|
end
|
130
127
|
|
131
128
|
def upload_part_opts(options)
|
@@ -135,43 +132,40 @@ module Aws
|
|
135
132
|
end
|
136
133
|
end
|
137
134
|
|
138
|
-
def
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
# keep other threads from uploading other parts
|
164
|
-
pending.clear!
|
165
|
-
e
|
166
|
-
end
|
135
|
+
def upload_with_executor(pending, completed, options)
|
136
|
+
upload_attempts = 0
|
137
|
+
completion_queue = Queue.new
|
138
|
+
abort_upload = false
|
139
|
+
errors = []
|
140
|
+
progress = MultipartProgress.new(pending, options[:progress_callback])
|
141
|
+
|
142
|
+
while (part = pending.shift)
|
143
|
+
break if abort_upload
|
144
|
+
|
145
|
+
upload_attempts += 1
|
146
|
+
@executor.post(part) do |p|
|
147
|
+
update_progress(progress, p)
|
148
|
+
resp = @client.upload_part(p)
|
149
|
+
p[:body].close
|
150
|
+
completed_part = { etag: resp.etag, part_number: p[:part_number] }
|
151
|
+
algorithm = resp.context.params[:checksum_algorithm].downcase
|
152
|
+
k = "checksum_#{algorithm}".to_sym
|
153
|
+
completed_part[k] = resp.send(k)
|
154
|
+
completed.push(completed_part)
|
155
|
+
rescue StandardError => e
|
156
|
+
abort_upload = true
|
157
|
+
errors << e
|
158
|
+
ensure
|
159
|
+
completion_queue << :done
|
167
160
|
end
|
168
|
-
threads << thread
|
169
161
|
end
|
170
|
-
|
162
|
+
|
163
|
+
upload_attempts.times { completion_queue.pop }
|
164
|
+
errors
|
171
165
|
end
|
172
166
|
|
173
|
-
def compute_default_part_size(
|
174
|
-
[(
|
167
|
+
def compute_default_part_size(file_size)
|
168
|
+
[(file_size.to_f / MAX_PARTS).ceil, MIN_PART_SIZE].max.to_i
|
175
169
|
end
|
176
170
|
|
177
171
|
def part_size(total_size, part_size, offset)
|
@@ -182,6 +176,15 @@ module Aws
|
|
182
176
|
end
|
183
177
|
end
|
184
178
|
|
179
|
+
def update_progress(progress, part)
|
180
|
+
return unless progress.progress_callback
|
181
|
+
|
182
|
+
part[:on_chunk_sent] =
|
183
|
+
proc do |_chunk, bytes, _total|
|
184
|
+
progress.call(part[:part_number], bytes)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
185
188
|
# @api private
|
186
189
|
class PartList
|
187
190
|
def initialize(parts = [])
|
@@ -222,6 +225,8 @@ module Aws
|
|
222
225
|
@progress_callback = progress_callback
|
223
226
|
end
|
224
227
|
|
228
|
+
attr_reader :progress_callback
|
229
|
+
|
225
230
|
def call(part_number, bytes_read)
|
226
231
|
# part numbers start at 1
|
227
232
|
@bytes_sent[part_number - 1] = bytes_read
|
@@ -11,7 +11,6 @@ module Aws
|
|
11
11
|
class MultipartStreamUploader
|
12
12
|
|
13
13
|
DEFAULT_PART_SIZE = 5 * 1024 * 1024 # 5MB
|
14
|
-
DEFAULT_THREAD_COUNT = 10
|
15
14
|
CREATE_OPTIONS = Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
|
16
15
|
UPLOAD_PART_OPTIONS = Set.new(Client.api.operation(:upload_part).input.shape.member_names)
|
17
16
|
COMPLETE_UPLOAD_OPTIONS = Set.new(Client.api.operation(:complete_multipart_upload).input.shape.member_names)
|
@@ -19,9 +18,9 @@ module Aws
|
|
19
18
|
# @option options [Client] :client
|
20
19
|
def initialize(options = {})
|
21
20
|
@client = options[:client] || Client.new
|
21
|
+
@executor = options[:executor]
|
22
22
|
@tempfile = options[:tempfile]
|
23
23
|
@part_size = options[:part_size] || DEFAULT_PART_SIZE
|
24
|
-
@thread_count = options[:thread_count] || DEFAULT_THREAD_COUNT
|
25
24
|
end
|
26
25
|
|
27
26
|
# @return [Client]
|
@@ -29,7 +28,6 @@ module Aws
|
|
29
28
|
|
30
29
|
# @option options [required,String] :bucket
|
31
30
|
# @option options [required,String] :key
|
32
|
-
# @option options [Integer] :thread_count (DEFAULT_THREAD_COUNT)
|
33
31
|
# @return [Seahorse::Client::Response] - the CompleteMultipartUploadResponse
|
34
32
|
def upload(options = {}, &block)
|
35
33
|
Aws::Plugins::UserAgent.metric('S3_TRANSFER') do
|
@@ -54,28 +52,30 @@ module Aws
|
|
54
52
|
end
|
55
53
|
|
56
54
|
def upload_parts(upload_id, options, &block)
|
57
|
-
|
58
|
-
|
59
|
-
|
55
|
+
completed_parts = Queue.new
|
56
|
+
errors = []
|
57
|
+
|
58
|
+
begin
|
60
59
|
IO.pipe do |read_pipe, write_pipe|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
block.call(write_pipe)
|
69
|
-
ensure
|
70
|
-
# Ensure the pipe is closed to avoid https://github.com/jruby/jruby/issues/6111
|
71
|
-
write_pipe.close
|
60
|
+
upload_thread = Thread.new do
|
61
|
+
upload_with_executor(
|
62
|
+
read_pipe,
|
63
|
+
completed_parts,
|
64
|
+
errors,
|
65
|
+
upload_part_opts(options).merge(upload_id: upload_id)
|
66
|
+
)
|
72
67
|
end
|
73
|
-
|
68
|
+
|
69
|
+
block.call(write_pipe)
|
70
|
+
ensure
|
71
|
+
# Ensure the pipe is closed to avoid https://github.com/jruby/jruby/issues/6111
|
72
|
+
write_pipe.close
|
73
|
+
upload_thread.join
|
74
74
|
end
|
75
75
|
rescue StandardError => e
|
76
|
-
|
76
|
+
errors << e
|
77
77
|
end
|
78
|
-
return ordered_parts(
|
78
|
+
return ordered_parts(completed_parts) if errors.empty?
|
79
79
|
|
80
80
|
abort_upload(upload_id, options, errors)
|
81
81
|
end
|
@@ -128,37 +128,34 @@ module Aws
|
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
132
|
-
|
131
|
+
def upload_with_executor(read_pipe, completed, errors, options)
|
132
|
+
completion_queue = Queue.new
|
133
|
+
queued_parts = 0
|
133
134
|
part_number = 0
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
ensure
|
148
|
-
clear_body(body)
|
149
|
-
end
|
150
|
-
end
|
151
|
-
nil
|
135
|
+
mutex = Mutex.new
|
136
|
+
loop do
|
137
|
+
part_body, current_part_num = mutex.synchronize do
|
138
|
+
[read_to_part_body(read_pipe), part_number += 1]
|
139
|
+
end
|
140
|
+
break unless part_body || current_part_num == 1
|
141
|
+
|
142
|
+
queued_parts += 1
|
143
|
+
@executor.post(part_body, current_part_num, options) do |body, num, opts|
|
144
|
+
part = opts.merge(body: body, part_number: num)
|
145
|
+
resp = @client.upload_part(part)
|
146
|
+
completed_part = create_completed_part(resp, part)
|
147
|
+
completed.push(completed_part)
|
152
148
|
rescue StandardError => e
|
153
|
-
# keep other threads from uploading other parts
|
154
149
|
mutex.synchronize do
|
155
|
-
|
150
|
+
errors.push(e)
|
156
151
|
read_pipe.close_read unless read_pipe.closed?
|
157
152
|
end
|
158
|
-
|
153
|
+
ensure
|
154
|
+
clear_body(body)
|
155
|
+
completion_queue << :done
|
159
156
|
end
|
160
|
-
thread
|
161
157
|
end
|
158
|
+
queued_parts.times { completion_queue.pop }
|
162
159
|
end
|
163
160
|
|
164
161
|
def create_completed_part(resp, part)
|
@@ -2,27 +2,74 @@
|
|
2
2
|
|
3
3
|
module Aws
|
4
4
|
module S3
|
5
|
-
# A high-level S3 transfer utility that provides enhanced upload and download
|
6
|
-
#
|
7
|
-
# handling of large files. The following features are supported:
|
5
|
+
# A high-level S3 transfer utility that provides enhanced upload and download capabilities with automatic
|
6
|
+
# multipart handling, progress tracking, and handling of large files. The following features are supported:
|
8
7
|
#
|
9
8
|
# * upload a file with multipart upload
|
10
9
|
# * upload a stream with multipart upload
|
11
10
|
# * download a S3 object with multipart download
|
12
11
|
# * track transfer progress by using progress listener
|
13
12
|
#
|
13
|
+
# ## Executor Management
|
14
|
+
# TransferManager uses executors to handle concurrent operations during multipart transfers. You can control
|
15
|
+
# concurrency behavior by providing a custom executor or relying on the default executor management.
|
16
|
+
#
|
17
|
+
# ### Default Behavior
|
18
|
+
# When no `:executor` is provided, TransferManager creates a new DefaultExecutor for each individual
|
19
|
+
# operation (`download_file`, `upload_file`, etc.) and automatically shuts it down when that operation completes.
|
20
|
+
# Each operation gets its own isolated thread pool with the specified `:thread_count` (default 10 threads).
|
21
|
+
#
|
22
|
+
# ### Custom Executor
|
23
|
+
# You can provide your own executor (e.g., `Concurrent::ThreadPoolExecutor`) for fine-grained control over thread
|
24
|
+
# pools and resource management. When using a custom executor, you are responsible for shutting it down
|
25
|
+
# when finished. The executor may be reused across multiple TransferManager operations.
|
26
|
+
#
|
27
|
+
# Custom executors must implement the same interface as DefaultExecutor.
|
28
|
+
#
|
29
|
+
# **Required methods:**
|
30
|
+
#
|
31
|
+
# * `post(*args, &block)` - Execute a task with given arguments and block
|
32
|
+
# * `kill` - Immediately terminate all running tasks
|
33
|
+
#
|
34
|
+
# **Optional methods:**
|
35
|
+
#
|
36
|
+
# * `shutdown(timeout = nil)` - Gracefully shutdown the executor with optional timeout
|
37
|
+
#
|
38
|
+
# @example Using default executor (automatic creation and shutdown)
|
39
|
+
# tm = TransferManager.new # No executor provided
|
40
|
+
# # DefaultExecutor created, used, and shutdown automatically
|
41
|
+
# tm.download_file('/path/to/file', bucket: 'bucket', key: 'key')
|
42
|
+
#
|
43
|
+
# @example Using custom executor (manual shutdown required)
|
44
|
+
# require 'concurrent-ruby'
|
45
|
+
#
|
46
|
+
# executor = Concurrent::ThreadPoolExecutor.new(max_threads: 5)
|
47
|
+
# tm = TransferManager.new(executor: executor)
|
48
|
+
# tm.download_file('/path/to/file1', bucket: 'bucket', key: 'key1')
|
49
|
+
# executor.shutdown # You must shutdown custom executors
|
50
|
+
#
|
14
51
|
class TransferManager
|
52
|
+
|
15
53
|
# @param [Hash] options
|
16
54
|
# @option options [S3::Client] :client (S3::Client.new)
|
17
55
|
# The S3 client to use for {TransferManager} operations. If not provided, a new default client
|
18
56
|
# will be created automatically.
|
57
|
+
# @option options [Object] :executor
|
58
|
+
# The executor to use for multipart operations. Must implement the same interface as {DefaultExecutor}.
|
59
|
+
# If not provided, a new {DefaultExecutor} will be created automatically for each operation and
|
60
|
+
# shutdown after completion. When provided a custom executor, it will be reused across operations, and
|
61
|
+
# you are responsible for shutting it down when finished.
|
19
62
|
def initialize(options = {})
|
20
|
-
@client = options
|
63
|
+
@client = options[:client] || Client.new
|
64
|
+
@executor = options[:executor]
|
21
65
|
end
|
22
66
|
|
23
67
|
# @return [S3::Client]
|
24
68
|
attr_reader :client
|
25
69
|
|
70
|
+
# @return [Object]
|
71
|
+
attr_reader :executor
|
72
|
+
|
26
73
|
# Downloads a file in S3 to a path on disk.
|
27
74
|
#
|
28
75
|
# # small files (< 5MB) are downloaded in a single API call
|
@@ -74,10 +121,7 @@ module Aws
|
|
74
121
|
# @option options [Integer] :chunk_size required in `"get_range"` mode.
|
75
122
|
#
|
76
123
|
# @option options [Integer] :thread_count (10) Customize threads used in the multipart download.
|
77
|
-
#
|
78
|
-
# @option options [String] :version_id The object version id used to retrieve the object.
|
79
|
-
#
|
80
|
-
# @see https://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectVersioning.html ObjectVersioning
|
124
|
+
# Only used when no custom executor is provided (creates {DefaultExecutor} with given thread count).
|
81
125
|
#
|
82
126
|
# @option options [String] :checksum_mode ("ENABLED")
|
83
127
|
# When `"ENABLED"` and the object has a stored checksum, it will be used to validate the download and will
|
@@ -102,8 +146,11 @@ module Aws
|
|
102
146
|
# @see Client#get_object
|
103
147
|
# @see Client#head_object
|
104
148
|
def download_file(destination, bucket:, key:, **options)
|
105
|
-
|
106
|
-
|
149
|
+
download_opts = options.merge(bucket: bucket, key: key)
|
150
|
+
executor = @executor || DefaultExecutor.new(max_threads: download_opts.delete(:thread_count))
|
151
|
+
downloader = FileDownloader.new(client: @client, executor: executor)
|
152
|
+
downloader.download(destination, download_opts)
|
153
|
+
executor.shutdown unless @executor
|
107
154
|
true
|
108
155
|
end
|
109
156
|
|
@@ -139,7 +186,7 @@ module Aws
|
|
139
186
|
# A file on the local file system that will be uploaded. This can either be a `String` or `Pathname` to the
|
140
187
|
# file, an open `File` object, or an open `Tempfile` object. If you pass an open `File` or `Tempfile` object,
|
141
188
|
# then you are responsible for closing it after the upload completes. When using an open Tempfile, rewind it
|
142
|
-
# before
|
189
|
+
# before uploading or else the object will be empty.
|
143
190
|
#
|
144
191
|
# @param [String] bucket
|
145
192
|
# The name of the S3 bucket to upload to.
|
@@ -156,15 +203,14 @@ module Aws
|
|
156
203
|
# Files larger han or equal to `:multipart_threshold` are uploaded using the S3 multipart upload APIs.
|
157
204
|
# Default threshold is `100MB`.
|
158
205
|
#
|
159
|
-
# @option options [Integer] :thread_count (10)
|
160
|
-
#
|
161
|
-
# `:multipart_threshold`.
|
206
|
+
# @option options [Integer] :thread_count (10) Customize threads used in the multipart upload.
|
207
|
+
# Only used when no custom executor is provided (creates {DefaultExecutor} with the given thread count).
|
162
208
|
#
|
163
209
|
# @option options [Proc] :progress_callback (nil)
|
164
210
|
# A Proc that will be called when each chunk of the upload is sent.
|
165
211
|
# It will be invoked with `[bytes_read]` and `[total_sizes]`.
|
166
212
|
#
|
167
|
-
# @raise [MultipartUploadError] If
|
213
|
+
# @raise [MultipartUploadError] If a file is being uploaded in parts, and the upload can not be completed,
|
168
214
|
# then the upload is aborted and this error is raised. The raised error has a `#errors` method that
|
169
215
|
# returns the failures that caused the upload to be aborted.
|
170
216
|
#
|
@@ -175,13 +221,16 @@ module Aws
|
|
175
221
|
# @see Client#complete_multipart_upload
|
176
222
|
# @see Client#upload_part
|
177
223
|
def upload_file(source, bucket:, key:, **options)
|
178
|
-
|
224
|
+
upload_opts = options.merge(bucket: bucket, key: key)
|
225
|
+
executor = @executor || DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
|
179
226
|
uploader = FileUploader.new(
|
180
|
-
multipart_threshold:
|
181
|
-
client: @client
|
227
|
+
multipart_threshold: upload_opts.delete(:multipart_threshold),
|
228
|
+
client: @client,
|
229
|
+
executor: executor
|
182
230
|
)
|
183
|
-
response = uploader.upload(source,
|
231
|
+
response = uploader.upload(source, upload_opts)
|
184
232
|
yield response if block_given?
|
233
|
+
executor.shutdown unless @executor
|
185
234
|
true
|
186
235
|
end
|
187
236
|
|
@@ -217,7 +266,8 @@ module Aws
|
|
217
266
|
# {Client#upload_part} can be provided.
|
218
267
|
#
|
219
268
|
# @option options [Integer] :thread_count (10)
|
220
|
-
# The number of parallel multipart uploads.
|
269
|
+
# The number of parallel multipart uploads. Only used when no custom executor is provided (creates
|
270
|
+
# {DefaultExecutor} with the given thread count). An additional thread is used internally for task coordination.
|
221
271
|
#
|
222
272
|
# @option options [Boolean] :tempfile (false)
|
223
273
|
# Normally read data is stored in memory when building the parts in order to complete the underlying
|
@@ -237,14 +287,16 @@ module Aws
|
|
237
287
|
# @see Client#complete_multipart_upload
|
238
288
|
# @see Client#upload_part
|
239
289
|
def upload_stream(bucket:, key:, **options, &block)
|
240
|
-
|
290
|
+
upload_opts = options.merge(bucket: bucket, key: key)
|
291
|
+
executor = @executor || DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
|
241
292
|
uploader = MultipartStreamUploader.new(
|
242
293
|
client: @client,
|
243
|
-
|
244
|
-
tempfile:
|
245
|
-
part_size:
|
294
|
+
executor: executor,
|
295
|
+
tempfile: upload_opts.delete(:tempfile),
|
296
|
+
part_size: upload_opts.delete(:part_size)
|
246
297
|
)
|
247
|
-
uploader.upload(
|
298
|
+
uploader.upload(upload_opts, &block)
|
299
|
+
executor.shutdown unless @executor
|
248
300
|
true
|
249
301
|
end
|
250
302
|
end
|
data/lib/aws-sdk-s3.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aws-sdk-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.200.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Amazon Web Services
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/aws-sdk-s3/customizations/object_version.rb
|
96
96
|
- lib/aws-sdk-s3/customizations/types/list_object_versions_output.rb
|
97
97
|
- lib/aws-sdk-s3/customizations/types/permanent_redirect.rb
|
98
|
+
- lib/aws-sdk-s3/default_executor.rb
|
98
99
|
- lib/aws-sdk-s3/encryption.rb
|
99
100
|
- lib/aws-sdk-s3/encryption/client.rb
|
100
101
|
- lib/aws-sdk-s3/encryption/decrypt_handler.rb
|