aws-sdk-s3 1.213.0 → 1.214.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 438515448f62837952b523ea5cb8f41592cb39e2afa557b30c33c521a307db0c
4
- data.tar.gz: fcb1cc7e9cc73429393692857809f2dc156bec4e6b93eba0ccf7824e4ede0d35
3
+ metadata.gz: 07e46262bf85414dd7871d9d15e79bb1d49a233e8afcc28d33303c83cbd93c89
4
+ data.tar.gz: 379e303c06523e12f6669689b540ba3338d4a4bb2f98705bdceef069bd51e445
5
5
  SHA512:
6
- metadata.gz: 7d2ae919bc1d2e451538e35c2e4a61ed087266cf4b3205eb64aba047a6a75039e2c431cd74844a71176eec93ec3b9fbe387effd734c3342c7aef76a4571c949d
7
- data.tar.gz: cf67bd5b39c6cd5264a86f71103324c6f19bca24b7ff355b1ab81a70a4d9f2d41c195043e96fcf56f6fcba563f7161b7e17ecd163fd53a941b096bfb3f8b5410
6
+ metadata.gz: 7748aaaaa2db0d367ea16ef7732a095c2ba45a143fc16f1dd0e4542fed575976227660e188ec05c88c4dfc539468a3b79b0ac52c149b05202026e6a4c8e2bb89
7
+ data.tar.gz: 49ec7554aca1a1f8a4acb23091265b594b8dfbd51a07bccab4ed8cdc49817adb645b5a6fa5132532356e4010079252aa2850b9012bc46956d7c12a555f255853
data/CHANGELOG.md CHANGED
@@ -1,6 +1,11 @@
1
1
  Unreleased Changes
2
2
  ------------------
3
3
 
4
+ 1.214.0 (2026-03-04)
5
+ ------------------
6
+
7
+ * Feature - Added `#upload_directory` and `#download_directory` to `Aws::S3::TransferManager` for bulk directory transfers.
8
+
4
9
  1.213.0 (2026-01-28)
5
10
  ------------------
6
11
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.213.0
1
+ 1.214.0
@@ -22553,7 +22553,7 @@ module Aws::S3
22553
22553
  tracer: tracer
22554
22554
  )
22555
22555
  context[:gem_name] = 'aws-sdk-s3'
22556
- context[:gem_version] = '1.213.0'
22556
+ context[:gem_version] = '1.214.0'
22557
22557
  Seahorse::Client::Request.new(handlers, context)
22558
22558
  end
22559
22559
 
@@ -7,15 +7,22 @@ module Aws
7
7
  autoload :Encryption, 'aws-sdk-s3/encryption'
8
8
  autoload :EncryptionV2, 'aws-sdk-s3/encryption_v2'
9
9
  autoload :EncryptionV3, 'aws-sdk-s3/encryption_v3'
10
- autoload :FilePart, 'aws-sdk-s3/file_part'
10
+ autoload :LegacySigner, 'aws-sdk-s3/legacy_signer'
11
+
12
+ # transfer manager + multipart upload/download utilities
11
13
  autoload :DefaultExecutor, 'aws-sdk-s3/default_executor'
14
+ autoload :FilePart, 'aws-sdk-s3/file_part'
12
15
  autoload :FileUploader, 'aws-sdk-s3/file_uploader'
13
16
  autoload :FileDownloader, 'aws-sdk-s3/file_downloader'
14
- autoload :LegacySigner, 'aws-sdk-s3/legacy_signer'
15
17
  autoload :MultipartDownloadError, 'aws-sdk-s3/multipart_download_error'
16
18
  autoload :MultipartFileUploader, 'aws-sdk-s3/multipart_file_uploader'
17
19
  autoload :MultipartStreamUploader, 'aws-sdk-s3/multipart_stream_uploader'
18
20
  autoload :MultipartUploadError, 'aws-sdk-s3/multipart_upload_error'
21
+ autoload :DirectoryProgress, 'aws-sdk-s3/directory_progress'
22
+ autoload :DirectoryDownloadError, 'aws-sdk-s3/directory_download_error'
23
+ autoload :DirectoryDownloader, 'aws-sdk-s3/directory_downloader'
24
+ autoload :DirectoryUploadError, 'aws-sdk-s3/directory_upload_error'
25
+ autoload :DirectoryUploader, 'aws-sdk-s3/directory_uploader'
19
26
  autoload :ObjectCopier, 'aws-sdk-s3/object_copier'
20
27
  autoload :ObjectMultipartCopier, 'aws-sdk-s3/object_multipart_copier'
21
28
  autoload :PresignedPost, 'aws-sdk-s3/presigned_post'
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aws
4
+ module S3
5
+ # Raised when DirectoryDownloader fails to download objects from S3 bucket
6
+ class DirectoryDownloadError < StandardError
7
+ def initialize(message, errors = [])
8
+ @errors = errors
9
+ super(message)
10
+ end
11
+
12
+ # @return [Array<StandardError>] The list of errors encountered when downloading objects
13
+ attr_reader :errors
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,230 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aws
4
+ module S3
5
+ # @api private
6
+ # This is a one-shot class that downloads objects from a bucket to a local directory.
7
+ # This works as follows:
8
+ # * ObjectProducer runs in a background thread, calling `list_objects_v2` and
9
+ # pushing entries into a SizedQueue (max: 100).
10
+ # * An internal executor pulls from that queue and posts work. Each task uses
11
+ # FileDownloader to download objects then signals completion via `completion_queue`.
12
+ #
13
+ # We track how many tasks we posted, then pop that many times from `completion_queue`
14
+ # to wait for everything to finish.
15
+ #
16
+ # Errors are collected in a mutex-protected array. On failure (unless ignore_failure is set),
17
+ # we call abort which closes the queue - the producer catches ClosedQueueError and exits cleanly.
18
+ class DirectoryDownloader
19
+ def initialize(options = {})
20
+ @client = options[:client] || Client.new
21
+ @executor = options[:executor] || DefaultExecutor.new
22
+ @logger = options[:logger]
23
+ @producer = nil
24
+ @mutex = Mutex.new
25
+ end
26
+
27
+ attr_reader :client, :executor
28
+
29
+ def abort
30
+ @producer&.close
31
+ end
32
+
33
+ def download(destination, bucket:, **options)
34
+ if File.exist?(destination)
35
+ raise ArgumentError, 'invalid destination, expected a directory' unless File.directory?(destination)
36
+ else
37
+ FileUtils.mkdir_p(destination)
38
+ end
39
+
40
+ download_opts = build_download_opts(destination, options)
41
+ @producer = ObjectProducer.new(build_producer_opts(destination, bucket, options))
42
+ downloader = FileDownloader.new(client: @client, executor: @executor)
43
+ downloads, errors = process_download_queue(downloader, download_opts)
44
+ build_result(downloads, errors)
45
+ end
46
+
47
+ private
48
+
49
+ def build_download_opts(destination, opts)
50
+ {
51
+ destination: destination,
52
+ ignore_failure: opts[:ignore_failure] || false
53
+ }
54
+ end
55
+
56
+ def build_producer_opts(destination, bucket, opts)
57
+ {
58
+ client: @client,
59
+ directory_downloader: self,
60
+ destination: destination,
61
+ bucket: bucket,
62
+ s3_prefix: opts[:s3_prefix],
63
+ filter_callback: opts[:filter_callback],
64
+ request_callback: opts[:request_callback]
65
+ }
66
+ end
67
+
68
+ def build_result(download_count, errors)
69
+ if @producer&.closed?
70
+ msg = "directory download failed: #{errors.map(&:message).join('; ')}"
71
+ raise DirectoryDownloadError.new(msg, errors)
72
+ else
73
+ {
74
+ completed_downloads: [download_count - errors.count, 0].max,
75
+ failed_downloads: errors.count,
76
+ errors: errors.any? ? errors : nil
77
+ }.compact
78
+ end
79
+ end
80
+
81
+ def download_object(entry, downloader, errors, opts)
82
+ raise entry.error if entry.error
83
+
84
+ FileUtils.mkdir_p(File.dirname(entry.path)) unless Dir.exist?(File.dirname(entry.path))
85
+ downloader.download(entry.path, entry.params)
86
+ @logger&.debug("Downloaded #{entry.params[:key]} from #{entry.params[:bucket]} to #{entry.path}")
87
+ rescue StandardError => e
88
+ @logger&.warn("Failed to download #{entry.params[:key]} from #{entry.params[:bucket]}: #{e.message}")
89
+ @mutex.synchronize { errors << e }
90
+ abort unless opts[:ignore_failure]
91
+ end
92
+
93
+ def process_download_queue(downloader, opts)
94
+ queue_executor = DefaultExecutor.new(max_threads: 2)
95
+ completion_queue = Queue.new
96
+ posted_count = 0
97
+ errors = []
98
+ begin
99
+ @producer.each do |object|
100
+ queue_executor.post(object) do |o|
101
+ download_object(o, downloader, errors, opts)
102
+ ensure
103
+ completion_queue << :done
104
+ end
105
+ posted_count += 1
106
+ end
107
+ rescue ClosedQueueError
108
+ # abort already requested
109
+ rescue StandardError => e
110
+ @mutex.synchronize { errors << e }
111
+ abort
112
+ end
113
+ posted_count.times { completion_queue.pop }
114
+ [posted_count, errors]
115
+ ensure
116
+ queue_executor&.shutdown
117
+ end
118
+
119
+ # @api private
120
+ class ObjectProducer
121
+ include Enumerable
122
+
123
+ DEFAULT_QUEUE_SIZE = 100
124
+ DONE_MARKER = :done
125
+
126
+ def initialize(opts = {})
127
+ @directory_downloader = opts[:directory_downloader]
128
+ @destination_dir = opts[:destination]
129
+ @bucket = opts[:bucket]
130
+ @client = opts[:client]
131
+ @s3_prefix = opts[:s3_prefix]
132
+ @filter_callback = opts[:filter_callback]
133
+ @request_callback = opts[:request_callback]
134
+ @object_queue = SizedQueue.new(DEFAULT_QUEUE_SIZE)
135
+ end
136
+
137
+ def closed?
138
+ @object_queue.closed?
139
+ end
140
+
141
+ def close
142
+ @object_queue.close
143
+ @object_queue.clear
144
+ end
145
+
146
+ def each
147
+ producer_thread = Thread.new do
148
+ stream_objects
149
+ @object_queue << DONE_MARKER
150
+ rescue ClosedQueueError
151
+ # abort requested
152
+ rescue StandardError => e
153
+ close
154
+ raise e
155
+ end
156
+
157
+ while (object = @object_queue.shift) && object != DONE_MARKER
158
+ yield object
159
+ end
160
+ ensure
161
+ producer_thread.value
162
+ end
163
+
164
+ private
165
+
166
+ def apply_request_callback(key, params)
167
+ callback_params = @request_callback.call(key, params.dup)
168
+ return params unless callback_params.is_a?(Hash) && callback_params.any?
169
+
170
+ params.merge(callback_params)
171
+ end
172
+
173
+ def build_object_entry(key)
174
+ params = { bucket: @bucket, key: key }
175
+ params = apply_request_callback(key, params) if @request_callback
176
+ error = validate_key(key)
177
+ return DownloadEntry.new(path: '', params: params, error: error) if error
178
+
179
+ full_path = normalize_path(File.join(@destination_dir, key))
180
+ DownloadEntry.new(path: full_path, params: params, error: error)
181
+ end
182
+
183
+ def include_object?(obj)
184
+ return true unless @filter_callback
185
+
186
+ @filter_callback.call(obj)
187
+ end
188
+
189
+ def directory_marker?(obj)
190
+ obj.key.end_with?('/') && obj.size.zero?
191
+ end
192
+
193
+ def normalize_path(path)
194
+ return path if File::SEPARATOR == '/'
195
+
196
+ path.tr('/', File::SEPARATOR)
197
+ end
198
+
199
+ def stream_objects(continuation_token: nil)
200
+ resp = @client.list_objects_v2(bucket: @bucket, prefix: @s3_prefix, continuation_token: continuation_token)
201
+ resp.contents&.each do |o|
202
+ next if directory_marker?(o)
203
+ next unless include_object?(o)
204
+
205
+ @object_queue << build_object_entry(o.key)
206
+ end
207
+ stream_objects(continuation_token: resp.next_continuation_token) if resp.next_continuation_token
208
+ end
209
+
210
+ def validate_key(key)
211
+ segments = key.split('/')
212
+ return unless segments.any? { |s| %w[. ..].include?(s) }
213
+
214
+ DirectoryDownloadError.new("invalid key '#{key}': contains '.' or '..' path segments")
215
+ end
216
+
217
+ # @api private
218
+ class DownloadEntry
219
+ def initialize(opts = {})
220
+ @path = opts[:path]
221
+ @params = opts[:params]
222
+ @error = opts[:error]
223
+ end
224
+
225
+ attr_reader :path, :params, :error
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aws
4
+ module S3
5
+ # Raised when DirectoryUploader fails to upload files to S3 bucket
6
+ class DirectoryUploadError < StandardError
7
+ def initialize(message, errors = [])
8
+ @errors = errors
9
+ super(message)
10
+ end
11
+
12
+ # @return [Array<StandardError>] The list of errors encountered when uploading files
13
+ attr_reader :errors
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,270 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module Aws
6
+ module S3
7
+ # @api private
8
+ # This is a one-shot class that uploads files from a local directory to a bucket.
9
+ # This works as follows:
10
+ # * FileProducer runs in a background thread, scanning the directory and
11
+ # pushing entries into a SizedQueue (max: 100).
12
+ # * An internal executor pulls from that queue and posts work. Each task uses
13
+ # FileUploader to upload files then signals completion via `completion_queue`.
14
+ #
15
+ # We track how many tasks we posted, then pop that many times from `completion_queue`
16
+ # to wait for everything to finish.
17
+ #
18
+ # Errors are collected in a mutex-protected array. On failure (unless ignore_failure is set),
19
+ # we call abort which closes the queue - the producer catches ClosedQueueError and exits cleanly.
20
+ class DirectoryUploader
21
+ def initialize(options = {})
22
+ @client = options[:client] || Client.new
23
+ @executor = options[:executor] || DefaultExecutor.new
24
+ @logger = options[:logger]
25
+ @producer = nil
26
+ @mutex = Mutex.new
27
+ end
28
+
29
+ attr_reader :client, :executor
30
+
31
+ def abort
32
+ @producer&.close
33
+ end
34
+
35
+ def upload(source_directory, bucket, **opts)
36
+ raise ArgumentError, 'Invalid directory' unless Dir.exist?(source_directory)
37
+
38
+ uploader = FileUploader.new(
39
+ multipart_threshold: opts.delete(:multipart_threshold),
40
+ http_chunk_size: opts.delete(:http_chunk_size),
41
+ client: @client,
42
+ executor: @executor
43
+ )
44
+ upload_opts = build_upload_opts(opts)
45
+ @producer = FileProducer.new(build_producer_opts(source_directory, bucket, opts))
46
+ uploads, errors = process_upload_queue(uploader, upload_opts)
47
+ build_result(uploads, errors)
48
+ end
49
+
50
+ private
51
+
52
+ def build_upload_opts(opts)
53
+ { ignore_failure: opts[:ignore_failure] || false }
54
+ end
55
+
56
+ def build_producer_opts(source_directory, bucket, opts)
57
+ {
58
+ directory_uploader: self,
59
+ source_dir: source_directory,
60
+ bucket: bucket,
61
+ s3_prefix: opts[:s3_prefix],
62
+ recursive: opts[:recursive] || false,
63
+ follow_symlinks: opts[:follow_symlinks] || false,
64
+ filter_callback: opts[:filter_callback],
65
+ request_callback: opts[:request_callback]
66
+ }
67
+ end
68
+
69
+ def build_result(upload_count, errors)
70
+ if @producer&.closed?
71
+ msg = "directory upload failed: #{errors.map(&:message).join('; ')}"
72
+ raise DirectoryUploadError.new(msg, errors)
73
+ else
74
+ {
75
+ completed_uploads: [upload_count - errors.count, 0].max,
76
+ failed_uploads: errors.count,
77
+ errors: errors.any? ? errors : nil
78
+ }.compact
79
+ end
80
+ end
81
+
82
+ def process_upload_queue(uploader, opts)
83
+ queue_executor = DefaultExecutor.new(max_threads: 2)
84
+ completion_queue = Queue.new
85
+ posted_count = 0
86
+ errors = []
87
+ begin
88
+ @producer.each do |file|
89
+ queue_executor.post(file) do |f|
90
+ upload_file(f, uploader, errors, opts)
91
+ ensure
92
+ completion_queue << :done
93
+ end
94
+ posted_count += 1
95
+ end
96
+ rescue ClosedQueueError
97
+ # abort already requested
98
+ rescue StandardError => e
99
+ @mutex.synchronize { errors << e }
100
+ abort
101
+ end
102
+ posted_count.times { completion_queue.pop }
103
+ [posted_count, errors]
104
+ ensure
105
+ queue_executor&.shutdown
106
+ end
107
+
108
+ def upload_file(entry, uploader, errors, opts)
109
+ uploader.upload(entry.path, entry.params)
110
+ @logger&.debug("Uploaded #{entry.path} to #{entry.params[:bucket]} as #{entry.params[:key]}")
111
+ rescue StandardError => e
112
+ @logger&.warn("Failed to upload #{entry.path} to #{entry.params[:bucket]}: #{e.message}")
113
+ @mutex.synchronize { errors << e }
114
+ abort unless opts[:ignore_failure]
115
+ end
116
+
117
+ # @api private
118
+ class FileProducer
119
+ include Enumerable
120
+
121
+ DEFAULT_QUEUE_SIZE = 100
122
+ DONE_MARKER = :done
123
+
124
+ def initialize(opts = {})
125
+ @directory_uploader = opts[:directory_uploader]
126
+ @source_dir = opts[:source_dir]
127
+ @bucket = opts[:bucket]
128
+ @s3_prefix = opts[:s3_prefix]
129
+ @recursive = opts[:recursive]
130
+ @follow_symlinks = opts[:follow_symlinks]
131
+ @filter_callback = opts[:filter_callback]
132
+ @request_callback = opts[:request_callback]
133
+ @file_queue = SizedQueue.new(DEFAULT_QUEUE_SIZE)
134
+ end
135
+
136
+ def closed?
137
+ @file_queue.closed?
138
+ end
139
+
140
+ def close
141
+ @file_queue.close
142
+ @file_queue.clear
143
+ end
144
+
145
+ def each
146
+ producer_thread = Thread.new do
147
+ if @recursive
148
+ find_recursively
149
+ else
150
+ find_directly
151
+ end
152
+ @file_queue << DONE_MARKER
153
+ rescue ClosedQueueError
154
+ # abort requested
155
+ rescue StandardError => e
156
+ # encountered a traversal error, we must abort immediately
157
+ close
158
+ raise DirectoryUploadError, "Directory traversal failed for '#{@source_dir}': #{e.message}"
159
+ end
160
+
161
+ while (file = @file_queue.shift) && file != DONE_MARKER
162
+ yield file
163
+ end
164
+ ensure
165
+ producer_thread.value
166
+ end
167
+
168
+ private
169
+
170
+ def apply_request_callback(file_path, params)
171
+ callback_params = @request_callback.call(file_path, params.dup)
172
+ return params unless callback_params.is_a?(Hash) && callback_params.any?
173
+
174
+ params.merge(callback_params)
175
+ end
176
+
177
+ def build_upload_entry(file_path, key)
178
+ params = { bucket: @bucket, key: @s3_prefix ? File.join(@s3_prefix, key) : key }
179
+ params = apply_request_callback(file_path, params) if @request_callback
180
+ UploadEntry.new(path: file_path, params: params)
181
+ end
182
+
183
+ def find_directly
184
+ Dir.each_child(@source_dir) do |entry|
185
+ entry_path = File.join(@source_dir, entry)
186
+ stat = nil
187
+
188
+ if @follow_symlinks
189
+ stat = File.stat(entry_path)
190
+ next if stat.directory?
191
+ else
192
+ stat = File.lstat(entry_path)
193
+ next if stat.symlink? || stat.directory?
194
+ end
195
+
196
+ next unless stat.file?
197
+ next unless include_file?(entry_path, entry)
198
+
199
+ @file_queue << build_upload_entry(entry_path, entry)
200
+ end
201
+ end
202
+
203
+ def find_recursively
204
+ if @follow_symlinks
205
+ ancestors = Set.new
206
+ ancestors << File.stat(@source_dir).ino
207
+ scan_directory(@source_dir, ancestors: ancestors)
208
+ else
209
+ scan_directory(@source_dir)
210
+ end
211
+ end
212
+
213
+ def include_file?(file_path, file_name)
214
+ return true unless @filter_callback
215
+
216
+ @filter_callback.call(file_path, file_name)
217
+ end
218
+
219
+ def scan_directory(dir_path, key_prefix: '', ancestors: nil)
220
+ Dir.each_child(dir_path) do |entry|
221
+ full_path = File.join(dir_path, entry)
222
+ next unless include_file?(full_path, entry)
223
+
224
+ stat = get_file_stat(full_path)
225
+ next unless stat
226
+
227
+ if stat.directory?
228
+ handle_directory(full_path, entry, key_prefix, ancestors)
229
+ elsif stat.file? # skip non-file types
230
+ key = key_prefix.empty? ? entry : File.join(key_prefix, entry)
231
+ @file_queue << build_upload_entry(full_path, key)
232
+ end
233
+ end
234
+ end
235
+
236
+ def get_file_stat(full_path)
237
+ return File.stat(full_path) if @follow_symlinks
238
+
239
+ lstat = File.lstat(full_path)
240
+ return if lstat.symlink?
241
+
242
+ lstat
243
+ end
244
+
245
+ def handle_directory(dir_path, dir_name, key_prefix, ancestors)
246
+ ino = nil
247
+ if @follow_symlinks && ancestors
248
+ ino = File.stat(dir_path).ino
249
+ return if ancestors.include?(ino) # cycle detected - skip
250
+
251
+ ancestors.add(ino)
252
+ end
253
+ new_prefix = key_prefix.empty? ? dir_name : File.join(key_prefix, dir_name)
254
+ scan_directory(dir_path, key_prefix: new_prefix, ancestors: ancestors)
255
+ ancestors.delete(ino) if @follow_symlinks && ancestors
256
+ end
257
+
258
+ # @api private
259
+ class UploadEntry
260
+ def initialize(opts = {})
261
+ @path = opts[:path]
262
+ @params = opts[:params]
263
+ end
264
+
265
+ attr_reader :path, :params
266
+ end
267
+ end
268
+ end
269
+ end
270
+ end
@@ -7,7 +7,9 @@ module Aws
7
7
  #
8
8
  # * upload a file with multipart upload
9
9
  # * upload a stream with multipart upload
10
- # * download a S3 object with multipart download
10
+ # * upload all files in a directory to an S3 bucket recursively or non-recursively
11
+ # * download an S3 object with multipart download
12
+ # * download all objects in an S3 bucket with same prefix to a local directory
11
13
  # * track transfer progress by using progress listener
12
14
  #
13
15
  # ## Executor Management
@@ -49,19 +51,21 @@ module Aws
49
51
  # executor.shutdown # You must shutdown custom executors
50
52
  #
51
53
  class TransferManager
52
-
53
54
  # @param [Hash] options
54
55
  # @option options [S3::Client] :client (S3::Client.new)
55
56
  # The S3 client to use for {TransferManager} operations. If not provided, a new default client
56
57
  # will be created automatically.
57
- # @option options [Object] :executor
58
+ # @option options [Object] :executor (nil)
58
59
  # The executor to use for multipart operations. Must implement the same interface as {DefaultExecutor}.
59
60
  # If not provided, a new {DefaultExecutor} will be created automatically for each operation and
60
61
  # shutdown after completion. When provided a custom executor, it will be reused across operations, and
61
62
  # you are responsible for shutting it down when finished.
63
+ # @option options [Logger] :logger (nil)
64
+ # The Logger instance for logging transfer operations. If not set, logging is disabled.
62
65
  def initialize(options = {})
63
66
  @client = options[:client] || Client.new
64
67
  @executor = options[:executor]
68
+ @logger = options[:logger]
65
69
  end
66
70
 
67
71
  # @return [S3::Client]
@@ -70,6 +74,98 @@ module Aws
70
74
  # @return [Object]
71
75
  attr_reader :executor
72
76
 
77
+ # @return [Logger]
78
+ attr_reader :logger
79
+
80
+ # Downloads objects in a S3 bucket to a local directory.
81
+ #
82
+ # The downloaded directory structure will match the provided S3 virtual bucket. For example,
83
+ # assume that you have the following keys in your bucket:
84
+ #
85
+ # * sample.jpg
86
+ # * photos/2022/January/sample.jpg
87
+ # * photos/2022/February/sample1.jpg
88
+ # * photos/2022/February/sample2.jpg
89
+ # * photos/2022/February/sample3.jpg
90
+ #
91
+ # Given a request to download bucket to a destination with path of `/test`, the downloaded
92
+ # directory would look like this:
93
+ #
94
+ # ```
95
+ # |- test
96
+ # |- sample.jpg
97
+ # |- photos
98
+ # |- 2022
99
+ # |- January
100
+ # |- sample.jpg
101
+ # |- February
102
+ # |- sample1.jpg
103
+ # |- sample2.jpg
104
+ # |- sample3.jpg
105
+ # ```
106
+ #
107
+ # Directory markers (zero-byte objects ending with `/`) are skipped during download.
108
+ # Existing files with same name as downloaded objects will be overwritten.
109
+ #
110
+ # Object keys containing path traversal sequences (`..` or `.`) will raise an error.
111
+ #
112
+ # @example Downloading buckets to a local directory
113
+ # tm = TransferManager.new
114
+ # tm.download_directory('/local/path', bucket: 'my-bucket')
115
+ # # => {completed_downloads: 7, failed_downloads: 0, errors: 0}
116
+ #
117
+ # @param [String] destination
118
+ # The location directory path to download objects to. Created if it doesn't exist.
119
+ # If files with the same names already exist in the destination, they will be overwritten.
120
+ #
121
+ # @param [String] bucket
122
+ # The name of the bucket to download from.
123
+ #
124
+ # @param [Hash] options
125
+ #
126
+ # @option options [String] :s3_prefix (nil)
127
+ # Limit the download to objects that begin with the specified prefix. The prefix is
128
+ # passed directly to the S3 ListObjectsV2 API for filtering. To match only objects
129
+ # within a specific "folder", include a trailing `/` (e.g., `"photos/"` instead of
130
+ # `"photos"`). The full object key is preserved in the local file path.
131
+ #
132
+ # @option options [Boolean] :ignore_failure (false)
133
+ # How to handle individual file download failures:
134
+ # * `false` (default) - Cancel all ongoing requests, terminate the ongoing downloads and raise an exception
135
+ # * `true` - Continue downloading remaining objects, report failures in result.
136
+ #
137
+ # @option options [Proc] :filter_callback (nil)
138
+ # A Proc to filter which objects to download. Called with `(object)` for each object.
139
+ # Return `true` to download the object, `false` to skip it.
140
+ #
141
+ # @option options [Proc] :request_callback (nil)
142
+ # A Proc to modify download parameters for each object. Called with `(key, params)`.
143
+ # Must return the modified parameters.
144
+ #
145
+ # @option options [Integer] :thread_count (10)
146
+ # The number of threads to use for multipart downloads of individual large files.
147
+ # Only used when no custom executor is provided to the {TransferManager}.
148
+ #
149
+ # @note On case-insensitive filesystems (e.g., Windows, macOS default), S3 object keys that
150
+ # differ only by case (e.g., "File.txt" and "file.txt") may overwrite each other when
151
+ # downloaded. This condition is not automatically detected. Use the `:filter_callback`
152
+ # option to handle such conflicts if needed.
153
+ #
154
+ # @raise [DirectoryDownloadError] Raised when download fails with `ignore_failure: false` (default)
155
+ #
156
+ # @return [Hash] Returns a hash with download statistics:
157
+ #
158
+ # * `:completed_downloads` - Number of objects successfully downloaded
159
+ # * `:failed_downloads` - Number of objects that failed to download
160
+ # * `:errors` - Array of errors for failed downloads (only present when failures occur)
161
+ def download_directory(destination, bucket:, **options)
162
+ executor = @executor || DefaultExecutor.new(max_threads: options.delete(:thread_count))
163
+ downloader = DirectoryDownloader.new(client: @client, executor: executor, logger: @logger)
164
+ result = downloader.download(destination, bucket: bucket, **options)
165
+ executor.shutdown unless @executor
166
+ result
167
+ end
168
+
73
169
  # Downloads a file in S3 to a path on disk.
74
170
  #
75
171
  # # small files (< 5MB) are downloaded in a single API call
@@ -120,8 +216,9 @@ module Aws
120
216
  #
121
217
  # @option options [Integer] :chunk_size required in `"get_range"` mode.
122
218
  #
123
- # @option options [Integer] :thread_count (10) Customize threads used in the multipart download.
124
- # Only used when no custom executor is provided (creates {DefaultExecutor} with given thread count).
219
+ # @option options [Integer] :thread_count (10)
220
+ # The number of threads to use for multipart downloads.
221
+ # Only used when no custom executor is provided to the {TransferManager}.
125
222
  #
126
223
  # @option options [String] :checksum_mode ("ENABLED")
127
224
  # This option is deprecated. Use `:response_checksum_validation` on your S3 client instead.
@@ -153,6 +250,120 @@ module Aws
153
250
  true
154
251
  end
155
252
 
253
+ # Uploads all files under the given directory to the provided S3 bucket.
254
+ # The key name transformation depends on the optional prefix.
255
+ #
256
+ # By default, all subdirectories will be uploaded non-recursively and symbolic links are not
257
+ # followed automatically. Assume you have a local directory `/test` with the following structure:
258
+ #
259
+ # ```
260
+ # |- test
261
+ # |- sample.jpg
262
+ # |- photos
263
+ # |- 2022
264
+ # |- January
265
+ # |- sample.jpg
266
+ # |- February
267
+ # |- sample1.jpg
268
+ # |- sample2.jpg
269
+ # |- sample3.jpg
270
+ # ```
271
+ #
272
+ # Give a request to upload directory `/test` to an S3 bucket on default setting, the target bucket will have the
273
+ # following S3 objects:
274
+ #
275
+ # * sample.jpg
276
+ #
277
+ # If `:recursive` set to `true`, the target bucket will have the following S3 buckets:
278
+ #
279
+ # * sample.jpg
280
+ # * photos/2022/January/sample.jpg
281
+ # * photos/2022/February/sample1.jpg
282
+ # * photos/2022/February/sample2.jpg
283
+ # * photos/2022/February/sample3.jpg
284
+ #
285
+ # Only regular files are uploaded; special files (sockets, pipes, devices) are skipped.
286
+ # Symlink cycles are detected and skipped when following symlinks.
287
+ # Empty directories are not represented in S3. Existing S3 objects with the same key are
288
+ # overwritten.
289
+ #
290
+ # @example Uploading a directory
291
+ # tm = TransferManager.new
292
+ # tm.upload_directory('/path/to/directory', bucket: 'bucket')
293
+ # # => {completed_uploads: 7, failed_uploads: 0}
294
+ #
295
+ # @example Using filter callback to upload only text files
296
+ # tm = TransferManager.new
297
+ # filter = proc do |file_path, file_name|
298
+ # File.extname(file_name) == '.txt' # Only upload .txt files
299
+ # end
300
+ # tm.upload_directory('/path/to/directory', bucket: 'bucket', filter_callback: filter)
301
+ #
302
+ # @param [String, Pathname, File, Tempfile] source
303
+ # The source directory to upload.
304
+ #
305
+ # @param [String] bucket
306
+ # The name of the bucket to upload objects to.
307
+ #
308
+ # @param [Hash] options
309
+ #
310
+ # @option options [String] :s3_prefix (nil)
311
+ # The S3 key prefix to use for each object. If not provided, files will be uploaded to the root of the bucket.
312
+ #
313
+ # @option options [Boolean] :recursive (false)
314
+ # Whether to upload directories recursively:
315
+ #
316
+ # * `false` (default) - only files in the top-level directory are uploaded, subdirectories are ignored.
317
+ # * `true` - all files and subdirectories are uploaded recursively.
318
+ #
319
+ # @option options [Boolean] :follow_symlinks (false)
320
+ # Whether to follow symbolic links when traversing the file tree:
321
+ #
322
+ # * `false` (default) - symbolic links are ignored and not uploaded.
323
+ # * `true` - symbolic links are followed and their target files/directories are uploaded. Symlink cycles
324
+ # are detected and skipped.
325
+ #
326
+ # @option options [Boolean] :ignore_failure (false)
327
+ # How to handle individual file upload failures:
328
+ #
329
+ # * `false` (default) - Cancel all ongoing requests, terminate the directory upload, and raise an exception
330
+ # * `true` - Ignore the failure and continue the transfer for other files
331
+ #
332
+ # @option options [Proc] :filter_callback (nil)
333
+ # A Proc to filter which files to upload. Called with `(file_path, file_name)` for each file.
334
+ # Return `true` to upload the file, `false` to skip it.
335
+ #
336
+ # @option options [Proc] :request_callback (nil)
337
+ # A Proc to modify upload parameters for each file. Called with `(file_path, params)`.
338
+ # Must return the modified parameters.
339
+ #
340
+ # @option options [Integer] :http_chunk_size (16384) Size in bytes for each chunk when streaming request bodies
341
+ # over HTTP. Controls the buffer size used when sending data to S3. Larger values may improve throughput by
342
+ # reducing the number of network writes, but use more memory. Custom values must be at least 16KB.
343
+ # Only Ruby MRI is supported.
344
+ #
345
+ # @option options [Integer] :thread_count (10)
346
+ # The number of threads to use for multipart uploads of individual large files.
347
+ # Only used when no custom executor is provided to the {TransferManager}.
348
+ #
349
+ # @raise [DirectoryUploadError] Raised when:
350
+ #
351
+ # * Upload failure with `ignore_failure: false` (default)
352
+ # * Directory traversal failure (permission denied, broken symlink, etc.)
353
+ #
354
+ # @return [Hash] Returns a hash with upload statistics:
355
+ #
356
+ # * `:completed_uploads` - Number of files successfully uploaded
357
+ # * `:failed_uploads` - Number of files that failed to upload
358
+ # * `:errors` - Array of error objects for failed uploads (only present when failures occur)
359
+ def upload_directory(source, bucket:, **options)
360
+ executor = @executor || DefaultExecutor.new(max_threads: options.delete(:thread_count))
361
+ uploader = DirectoryUploader.new(client: @client, executor: executor, logger: @logger)
362
+ result = uploader.upload(source, bucket, **options.merge(http_chunk_size: resolve_http_chunk_size(options)))
363
+ executor.shutdown unless @executor
364
+ result
365
+ end
366
+
156
367
  # Uploads a file from disk to S3.
157
368
  #
158
369
  # # a small file are uploaded with PutObject API
@@ -202,8 +413,9 @@ module Aws
202
413
  # Files larger han or equal to `:multipart_threshold` are uploaded using the S3 multipart upload APIs.
203
414
  # Default threshold is `100MB`.
204
415
  #
205
- # @option options [Integer] :thread_count (10) Customize threads used in the multipart upload.
206
- # Only used when no custom executor is provided (creates {DefaultExecutor} with the given thread count).
416
+ # @option options [Integer] :thread_count (10)
417
+ # The number of threads to use for multipart uploads.
418
+ # Only used when no custom executor is provided to the {TransferManager}.
207
419
  #
208
420
  # @option option [Integer] :http_chunk_size (16384) Size in bytes for each chunk when streaming request bodies
209
421
  # over HTTP. Controls the buffer size used when sending data to S3. Larger values may improve throughput by
@@ -226,17 +438,7 @@ module Aws
226
438
  # @see Client#upload_part
227
439
  def upload_file(source, bucket:, key:, **options)
228
440
  upload_opts = options.merge(bucket: bucket, key: key)
229
- http_chunk_size =
230
- if defined?(JRUBY_VERSION)
231
- nil
232
- else
233
- chunk = upload_opts.delete(:http_chunk_size)
234
- if chunk && chunk < Aws::Plugins::ChecksumAlgorithm::DEFAULT_TRAILER_CHUNK_SIZE
235
- raise ArgumentError, ':http_chunk_size must be at least 16384 bytes (16KB)'
236
- end
237
-
238
- chunk
239
- end
441
+ http_chunk_size = resolve_http_chunk_size(upload_opts)
240
442
 
241
443
  executor = @executor || DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
242
444
  uploader = FileUploader.new(
@@ -316,6 +518,19 @@ module Aws
316
518
  executor.shutdown unless @executor
317
519
  true
318
520
  end
521
+
522
+ private
523
+
524
+ def resolve_http_chunk_size(opts)
525
+ return if defined?(JRUBY_VERSION)
526
+
527
+ chunk = opts.delete(:http_chunk_size)
528
+ if chunk && chunk < Aws::Plugins::ChecksumAlgorithm::DEFAULT_TRAILER_CHUNK_SIZE
529
+ raise ArgumentError, ':http_chunk_size must be at least 16384 bytes (16KB)'
530
+ end
531
+
532
+ chunk
533
+ end
319
534
  end
320
535
  end
321
536
  end
data/lib/aws-sdk-s3.rb CHANGED
@@ -75,7 +75,7 @@ module Aws::S3
75
75
  autoload :ObjectVersion, 'aws-sdk-s3/object_version'
76
76
  autoload :EventStreams, 'aws-sdk-s3/event_streams'
77
77
 
78
- GEM_VERSION = '1.213.0'
78
+ GEM_VERSION = '1.214.0'
79
79
 
80
80
  end
81
81
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.213.0
4
+ version: 1.214.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
@@ -96,6 +96,10 @@ files:
96
96
  - lib/aws-sdk-s3/customizations/types/list_object_versions_output.rb
97
97
  - lib/aws-sdk-s3/customizations/types/permanent_redirect.rb
98
98
  - lib/aws-sdk-s3/default_executor.rb
99
+ - lib/aws-sdk-s3/directory_download_error.rb
100
+ - lib/aws-sdk-s3/directory_downloader.rb
101
+ - lib/aws-sdk-s3/directory_upload_error.rb
102
+ - lib/aws-sdk-s3/directory_uploader.rb
99
103
  - lib/aws-sdk-s3/encryption.rb
100
104
  - lib/aws-sdk-s3/encryption/client.rb
101
105
  - lib/aws-sdk-s3/encryption/decrypt_handler.rb