RubyGems - aws-sdk-s3 - Versions diffs - 1.196.1 → 1.213.0 - Mend

aws-sdk-s3 1.196.1 → 1.213.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +116 -0
data/VERSION +1 -1
data/lib/aws-sdk-s3/bucket.rb +17 -17
data/lib/aws-sdk-s3/bucket_acl.rb +1 -1
data/lib/aws-sdk-s3/bucket_versioning.rb +33 -0
data/lib/aws-sdk-s3/client.rb +1271 -453
data/lib/aws-sdk-s3/client_api.rb +115 -0
data/lib/aws-sdk-s3/customizations/object.rb +39 -24
data/lib/aws-sdk-s3/customizations.rb +3 -1
data/lib/aws-sdk-s3/default_executor.rb +103 -0
data/lib/aws-sdk-s3/encryption/client.rb +2 -2
data/lib/aws-sdk-s3/encryption/default_cipher_provider.rb +2 -0
data/lib/aws-sdk-s3/encryption/encrypt_handler.rb +2 -0
data/lib/aws-sdk-s3/encryption/kms_cipher_provider.rb +2 -0
data/lib/aws-sdk-s3/encryptionV2/client.rb +98 -23
data/lib/aws-sdk-s3/encryptionV2/decrypt_handler.rb +7 -162
data/lib/aws-sdk-s3/encryptionV2/decryption.rb +205 -0
data/lib/aws-sdk-s3/encryptionV2/default_cipher_provider.rb +17 -0
data/lib/aws-sdk-s3/encryptionV2/encrypt_handler.rb +2 -0
data/lib/aws-sdk-s3/encryptionV2/io_encrypter.rb +2 -0
data/lib/aws-sdk-s3/encryptionV2/kms_cipher_provider.rb +8 -0
data/lib/aws-sdk-s3/encryptionV2/utils.rb +5 -0
data/lib/aws-sdk-s3/encryptionV3/client.rb +885 -0
data/lib/aws-sdk-s3/encryptionV3/decrypt_handler.rb +98 -0
data/lib/aws-sdk-s3/encryptionV3/decryption.rb +244 -0
data/lib/aws-sdk-s3/encryptionV3/default_cipher_provider.rb +159 -0
data/lib/aws-sdk-s3/encryptionV3/default_key_provider.rb +35 -0
data/lib/aws-sdk-s3/encryptionV3/encrypt_handler.rb +98 -0
data/lib/aws-sdk-s3/encryptionV3/errors.rb +47 -0
data/lib/aws-sdk-s3/encryptionV3/io_auth_decrypter.rb +60 -0
data/lib/aws-sdk-s3/encryptionV3/io_decrypter.rb +35 -0
data/lib/aws-sdk-s3/encryptionV3/io_encrypter.rb +84 -0
data/lib/aws-sdk-s3/encryptionV3/key_provider.rb +28 -0
data/lib/aws-sdk-s3/encryptionV3/kms_cipher_provider.rb +159 -0
data/lib/aws-sdk-s3/encryptionV3/materials.rb +58 -0
data/lib/aws-sdk-s3/encryptionV3/utils.rb +321 -0
data/lib/aws-sdk-s3/encryption_v2.rb +1 -0
data/lib/aws-sdk-s3/encryption_v3.rb +24 -0
data/lib/aws-sdk-s3/endpoint_parameters.rb +17 -17
data/lib/aws-sdk-s3/endpoint_provider.rb +241 -68
data/lib/aws-sdk-s3/endpoints.rb +39 -0
data/lib/aws-sdk-s3/errors.rb +11 -0
data/lib/aws-sdk-s3/file_downloader.rb +192 -104
data/lib/aws-sdk-s3/file_uploader.rb +17 -13
data/lib/aws-sdk-s3/multipart_file_uploader.rb +82 -69
data/lib/aws-sdk-s3/multipart_stream_uploader.rb +96 -107
data/lib/aws-sdk-s3/multipart_upload.rb +12 -12
data/lib/aws-sdk-s3/multipart_upload_part.rb +8 -8
data/lib/aws-sdk-s3/object.rb +88 -59
data/lib/aws-sdk-s3/object_acl.rb +5 -5
data/lib/aws-sdk-s3/object_summary.rb +70 -41
data/lib/aws-sdk-s3/object_version.rb +23 -25
data/lib/aws-sdk-s3/plugins/checksum_algorithm.rb +18 -5
data/lib/aws-sdk-s3/plugins/endpoints.rb +1 -1
data/lib/aws-sdk-s3/plugins/http_200_errors.rb +58 -34
data/lib/aws-sdk-s3/transfer_manager.rb +321 -0
data/lib/aws-sdk-s3/types.rb +687 -330
data/lib/aws-sdk-s3.rb +1 -1
data/sig/bucket.rbs +1 -1
data/sig/client.rbs +62 -12
data/sig/errors.rbs +2 -0
data/sig/multipart_upload.rbs +1 -1
data/sig/object.rbs +7 -5
data/sig/object_summary.rbs +7 -5
data/sig/types.rbs +84 -14
metadata +21 -3

data/lib/aws-sdk-s3/plugins/http_200_errors.rb CHANGED Viewed

@@ -3,15 +3,28 @@
 module Aws
   module S3
     module Plugins
       # A handful of Amazon S3 operations will respond with a 200 status
       # code but will send an error in the response body. This plugin
       # injects a handler that will parse 200 response bodies for potential
       # errors, allowing them to be retried.
       # @api private
       class Http200Errors < Seahorse::Client::Plugin
         class Handler < Seahorse::Client::Handler
+          # A regular expression to match error codes in the response body
+          CODE_PATTERN = %r{<Code>(.+?)</Code>}.freeze
+          private_constant :CODE_PATTERN
+          # A list of encodings we force into UTF-8
+          ENCODINGS_TO_FIX = [Encoding::US_ASCII, Encoding::ASCII_8BIT].freeze
+          private_constant :ENCODINGS_TO_FIX
+          # A regular expression to match detect errors in the response body
+          ERROR_PATTERN = /<\?xml\s[^>]*\?>\s*<Error>/.freeze
+          private_constant :ERROR_PATTERN
+          # A regular expression to match an error message in the response body
+          MESSAGE_PATTERN = %r{<Message>(.+?)</Message>}.freeze
+          private_constant :MESSAGE_PATTERN
           def call(context)
             @handler.call(context).on(200) do |response|
@@ -28,29 +41,37 @@ module Aws
           private
-          # Streaming outputs are not subject to 200 errors.
-          def streaming_output?(output)
-            if (payload = output[:payload_member])
-              # checking ref and shape
-              payload['streaming'] || payload.shape['streaming'] ||
-                payload.eventstream
-            else
-              false
+          def build_error(context, code, message)
+            S3::Errors.error_class(code).new(context, message)
+          end
+          def check_for_error(context)
+            xml = normalize_encoding(context.http_response.body_contents)
+            if xml.match?(ERROR_PATTERN)
+              error_code = xml.match(CODE_PATTERN)[1]
+              error_message = xml.match(MESSAGE_PATTERN)[1]
+              build_error(context, error_code, error_message)
+            elsif incomplete_xml_body?(xml, context.operation.output)
+              Seahorse::Client::NetworkingError.new(
+                build_error(context, 'InternalError', 'Empty or incomplete response body')
+              )
             end
           end
+          # Must have a member in the body and have the start of an XML Tag.
+          # Other incomplete xml bodies will result in an XML ParsingError.
+          def incomplete_xml_body?(xml, output)
+            members_in_body?(output) && !xml.match(/<\w/)
+          end
           # Checks if the output shape is a structure shape and has members that
           # are in the body for the case of a payload and a normal structure. A
           # non-structure shape will not have members in the body. In the case
           # of a string or blob, the body contents would have been checked first
           # before this method is called in incomplete_xml_body?.
           def members_in_body?(output)
-            shape =
-              if output[:payload_member]
-                output[:payload_member].shape
-              else
-                output.shape
-              end
+            shape = resolve_shape(output)
             if structure_shape?(shape)
               shape.members.any? { |_, k| k.location.nil? }
@@ -59,30 +80,33 @@ module Aws
             end
           end
-          def structure_shape?(shape)
-            shape.is_a?(Seahorse::Model::Shapes::StructureShape)
+          # Fixes encoding issues when S3 returns UTF-8 content with missing charset in Content-Type header or omits
+          # Content-Type header entirely.  Net::HTTP defaults to US-ASCII or ASCII-8BIT when charset is unspecified.
+          def normalize_encoding(xml)
+            return xml unless xml.is_a?(String) && ENCODINGS_TO_FIX.include?(xml.encoding)
+            xml.force_encoding('UTF-8')
           end
-          # Must have a member in the body and have the start of an XML Tag.
-          # Other incomplete xml bodies will result in an XML ParsingError.
-          def incomplete_xml_body?(xml, output)
-            members_in_body?(output) && !xml.match(/<\w/)
+          def resolve_shape(output)
+            return output.shape unless output[:payload_member]
+            output[:payload_member].shape
           end
-          def check_for_error(context)
-            xml = context.http_response.body_contents
-            if xml.match(/<\?xml\s[^>]*\?>\s*<Error>/)
-              error_code = xml.match(%r{<Code>(.+?)</Code>})[1]
-              error_message = xml.match(%r{<Message>(.+?)</Message>})[1]
-              S3::Errors.error_class(error_code).new(context, error_message)
-            elsif incomplete_xml_body?(xml, context.operation.output)
-              Seahorse::Client::NetworkingError.new(
-                S3::Errors
-                  .error_class('InternalError')
-                  .new(context, 'Empty or incomplete response body')
-              )
+          # Streaming outputs are not subject to 200 errors.
+          def streaming_output?(output)
+            if (payload = output[:payload_member])
+              # checking ref and shape
+              payload['streaming'] || payload.shape['streaming'] || payload.eventstream
+            else
+              false
             end
           end
+          def structure_shape?(shape)
+            shape.is_a?(Seahorse::Model::Shapes::StructureShape)
+          end
         end
         handler(Handler, step: :sign)

data/lib/aws-sdk-s3/transfer_manager.rb ADDED Viewed

@@ -0,0 +1,321 @@
+# frozen_string_literal: true
+module Aws
+  module S3
+    # A high-level S3 transfer utility that provides enhanced upload and download capabilities with automatic
+    # multipart handling, progress tracking, and handling of large files. The following features are supported:
+    #
+    # * upload a file with multipart upload
+    # * upload a stream with multipart upload
+    # * download a S3 object with multipart download
+    # * track transfer progress by using progress listener
+    #
+    # ## Executor Management
+    # TransferManager uses executors to handle concurrent operations during multipart transfers. You can control
+    # concurrency behavior by providing a custom executor or relying on the default executor management.
+    #
+    # ### Default Behavior
+    # When no `:executor` is provided, TransferManager creates a new DefaultExecutor for each individual
+    # operation (`download_file`, `upload_file`, etc.) and automatically shuts it down when that operation completes.
+    # Each operation gets its own isolated thread pool with the specified `:thread_count` (default 10 threads).
+    #
+    # ### Custom Executor
+    # You can provide your own executor (e.g., `Concurrent::ThreadPoolExecutor`) for fine-grained control over thread
+    # pools and resource management. When using a custom executor, you are responsible for shutting it down
+    # when finished. The executor may be reused across multiple TransferManager operations.
+    #
+    # Custom executors must implement the same interface as DefaultExecutor.
+    #
+    # **Required methods:**
+    #
+    #   * `post(*args, &block)` - Execute a task with given arguments and block
+    #   * `kill` - Immediately terminate all running tasks
+    #
+    # **Optional methods:**
+    #
+    #   * `shutdown(timeout = nil)` - Gracefully shutdown the executor with optional timeout
+    #
+    # @example Using default executor (automatic creation and shutdown)
+    #     tm = TransferManager.new # No executor provided
+    #     # DefaultExecutor created, used, and shutdown automatically
+    #     tm.download_file('/path/to/file', bucket: 'bucket', key: 'key')
+    #
+    # @example Using custom executor (manual shutdown required)
+    #     require 'concurrent-ruby'
+    #
+    #     executor = Concurrent::ThreadPoolExecutor.new(max_threads: 5)
+    #     tm = TransferManager.new(executor: executor)
+    #     tm.download_file('/path/to/file1', bucket: 'bucket', key: 'key1')
+    #     executor.shutdown # You must shutdown custom executors
+    #
+    class TransferManager
+      # @param [Hash] options
+      # @option options [S3::Client] :client (S3::Client.new)
+      #   The S3 client to use for {TransferManager} operations. If not provided, a new default client
+      #   will be created automatically.
+      # @option options [Object] :executor
+      #   The executor to use for multipart operations. Must implement the same interface as {DefaultExecutor}.
+      #   If not provided, a new {DefaultExecutor} will be created automatically for each operation and
+      #   shutdown after completion. When provided a custom executor, it will be reused across operations, and
+      #   you are responsible for shutting it down when finished.
+      def initialize(options = {})
+        @client = options[:client] || Client.new
+        @executor = options[:executor]
+      end
+      # @return [S3::Client]
+      attr_reader :client
+      # @return [Object]
+      attr_reader :executor
+      # Downloads a file in S3 to a path on disk.
+      #
+      #     # small files (< 5MB) are downloaded in a single API call
+      #     tm = TransferManager.new
+      #     tm.download_file('/path/to/file', bucket: 'bucket', key: 'key')
+      #
+      # Files larger than 5MB are downloaded using multipart method:
+      #
+      #     # large files are split into parts and the parts are downloaded in parallel
+      #     tm.download_file('/path/to/large_file', bucket: 'bucket', key: 'key')
+      #
+      # You can provide a callback to monitor progress of the download:
+      #
+      #     # bytes and part_sizes are each an array with 1 entry per part
+      #     # part_sizes may not be known until the first bytes are retrieved
+      #     progress = proc do |bytes, part_sizes, file_size|
+      #       bytes.map.with_index do |b, i|
+      #         puts "Part #{i + 1}: #{b} / #{part_sizes[i]}".join(' ') + "Total: #{100.0 * bytes.sum / file_size}%"
+      #       end
+      #     end
+      #     tm.download_file('/path/to/file', bucket: 'bucket', key: 'key', progress_callback: progress)
+      #
+      # @param [String, Pathname, File, Tempfile] destination
+      #   Where to download the file to. This can either be a String or Pathname to the file, an open File object,
+      #   or an open Tempfile object. If you pass an open File or Tempfile object, then you are responsible for
+      #   closing it after the download completes. Download behavior varies by destination type:
+      #
+      #   * **String/Pathname paths**: Downloads to a temporary file first, then atomically moves to the final
+      #    destination. This prevents corruption of any existing file if the download fails.
+      #   * **File/Tempfile objects**: Downloads directly to the file object without using temporary files.
+      #    You are responsible for managing the file object's state and closing it after the download completes.
+      #    If the download fails, the file object may contain partial data.
+      #
+      # @param [String] bucket
+      #   The name of the S3 bucket to upload to.
+      #
+      # @param [String] key
+      #   The object key name in S3 bucket.
+      #
+      # @param [Hash] options
+      #   Additional options for {Client#get_object} and #{Client#head_object} may be provided.
+      #
+      # @option options [String] :mode ("auto") `"auto"`, `"single_request"` or `"get_range"`
+      #
+      #  * `"auto"` mode is enabled by default, which performs `multipart_download`
+      #  * `"single_request`" mode forces only 1 GET request is made in download
+      #  * `"get_range"` mode requires `:chunk_size` parameter to configured in customizing each range size
+      #
+      # @option options [Integer] :chunk_size required in `"get_range"` mode.
+      #
+      # @option options [Integer] :thread_count (10) Customize threads used in the multipart download.
+      #   Only used when no custom executor is provided (creates {DefaultExecutor} with given thread count).
+      #
+      # @option options [String] :checksum_mode ("ENABLED")
+      #   This option is deprecated. Use `:response_checksum_validation` on your S3 client instead.
+      #   To disable checksum validation, set `response_checksum_validation: 'when_required'`
+      #   when creating your S3 client.
+      #
+      # @option options [Callable] :on_checksum_validated
+      #   Called each time a request's checksum is validated with the checksum algorithm and the
+      #   response.  For multipart downloads, this will be called for each part that is downloaded and validated.
+      #
+      # @option options [Proc] :progress_callback
+      #   A Proc that will be called when each chunk of the download is received. It will be invoked with
+      #   `bytes_read`, `part_sizes`, `file_size`. When the object is downloaded as parts (rather than by ranges),
+      #   the `part_sizes` will not be known ahead of time and will be `nil` in the callback until the first bytes
+      #   in the part are received.
+      #
+      # @raise [MultipartDownloadError] Raised when an object validation fails outside of service errors.
+      #
+      # @return [Boolean] Returns `true` when the file is downloaded without any errors.
+      #
+      # @see Client#get_object
+      # @see Client#head_object
+      def download_file(destination, bucket:, key:, **options)
+        download_opts = options.merge(bucket: bucket, key: key)
+        executor = @executor || DefaultExecutor.new(max_threads: download_opts.delete(:thread_count))
+        downloader = FileDownloader.new(client: @client, executor: executor)
+        downloader.download(destination, download_opts)
+        executor.shutdown unless @executor
+        true
+      end
+      # Uploads a file from disk to S3.
+      #
+      #     # a small file are uploaded with PutObject API
+      #     tm = TransferManager.new
+      #     tm.upload_file('/path/to/small_file', bucket: 'bucket', key: 'key')
+      #
+      # Files larger than or equal to `:multipart_threshold` are uploaded using multipart upload APIs.
+      #
+      #     # large files are automatically split into parts and the parts are uploaded in parallel
+      #     tm.upload_file('/path/to/large_file', bucket: 'bucket', key: 'key')
+      #
+      # The response of the S3 upload API is yielded if a block given.
+      #
+      #     # API response will have etag value of the file
+      #     tm.upload_file('/path/to/file', bucket: 'bucket', key: 'key') do |response|
+      #       etag = response.etag
+      #     end
+      #
+      # You can provide a callback to monitor progress of the upload:
+      #
+      #     # bytes and totals are each an array with 1 entry per part
+      #     progress = proc do |bytes, totals|
+      #       bytes.map.with_index do |b, i|
+      #           puts "Part #{i + 1}: #{b} / #{totals[i]} " + "Total: #{100.0 * bytes.sum / totals.sum}%"
+      #       end
+      #     end
+      #     tm.upload_file('/path/to/file', bucket: 'bucket', key: 'key', progress_callback: progress)
+      #
+      # @param [String, Pathname, File, Tempfile] source
+      #   A file on the local file system that will be uploaded. This can either be a `String` or `Pathname` to the
+      #   file, an open `File` object, or an open `Tempfile` object. If you pass an open `File` or `Tempfile` object,
+      #   then you are responsible for closing it after the upload completes. When using an open Tempfile, rewind it
+      #   before uploading or else the object will be empty.
+      #
+      # @param [String] bucket
+      #   The name of the S3 bucket to upload to.
+      #
+      # @param [String] key
+      #   The object key name for the uploaded file.
+      #
+      # @param [Hash] options
+      #   Additional options for {Client#put_object} when file sizes below the multipart threshold.
+      #   For files larger than the multipart threshold, options for {Client#create_multipart_upload},
+      #   {Client#complete_multipart_upload}, and {Client#upload_part} can be provided.
+      #
+      # @option options [Integer] :multipart_threshold (104857600)
+      #   Files larger han or equal to `:multipart_threshold` are uploaded using the S3 multipart upload APIs.
+      #   Default threshold is `100MB`.
+      #
+      # @option options [Integer] :thread_count (10) Customize threads used in the multipart upload.
+      #   Only used when no custom executor is provided (creates {DefaultExecutor} with the given thread count).
+      #
+      # @option option [Integer] :http_chunk_size (16384) Size in bytes for each chunk when streaming request bodies
+      #   over HTTP. Controls the buffer size used when sending data to S3. Larger values may improve throughput by
+      #   reducing the number of network writes, but use more memory. Custom values must be at least 16KB.
+      #   Only Ruby MRI is supported.
+      #
+      # @option options [Proc] :progress_callback (nil)
+      #   A Proc that will be called when each chunk of the upload is sent.
+      #   It will be invoked with `[bytes_read]` and  `[total_sizes]`.
+      #
+      # @raise [MultipartUploadError] If a file is being uploaded in parts, and the upload can not be completed,
+      #   then the upload is aborted and this error is raised.  The raised error has a `#errors` method that
+      #   returns the failures that caused the upload to be aborted.
+      #
+      # @return [Boolean] Returns `true` when the file is uploaded without any errors.
+      #
+      # @see Client#put_object
+      # @see Client#create_multipart_upload
+      # @see Client#complete_multipart_upload
+      # @see Client#upload_part
+      def upload_file(source, bucket:, key:, **options)
+        upload_opts = options.merge(bucket: bucket, key: key)
+        http_chunk_size =
+          if defined?(JRUBY_VERSION)
+            nil
+          else
+            chunk = upload_opts.delete(:http_chunk_size)
+            if chunk && chunk < Aws::Plugins::ChecksumAlgorithm::DEFAULT_TRAILER_CHUNK_SIZE
+              raise ArgumentError, ':http_chunk_size must be at least 16384 bytes (16KB)'
+            end
+            chunk
+          end
+        executor = @executor || DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
+        uploader = FileUploader.new(
+          multipart_threshold: upload_opts.delete(:multipart_threshold),
+          http_chunk_size: http_chunk_size,
+          client: @client,
+          executor: executor
+        )
+        response = uploader.upload(source, upload_opts)
+        yield response if block_given?
+        executor.shutdown unless @executor
+        true
+      end
+      # Uploads a stream in a streaming fashion to S3.
+      #
+      # Passed chunks automatically split into multipart upload parts and the parts are uploaded in parallel.
+      # This allows for streaming uploads that never touch the disk.
+      #
+      # **Note**: There are known issues in JRuby until jruby-9.1.15.0, so avoid using this with older JRuby versions.
+      #
+      # @example Streaming chunks of data
+      #     tm = TransferManager.new
+      #     tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream|
+      #       10.times { write_stream << 'foo' }
+      #     end
+      # @example Streaming chunks of data
+      #     tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream|
+      #       IO.copy_stream(IO.popen('ls'), write_stream)
+      #     end
+      # @example Streaming chunks of data
+      #     tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream|
+      #       IO.copy_stream(STDIN, write_stream)
+      #     end
+      #
+      # @param [String] bucket
+      #   The name of the S3 bucket to upload to.
+      #
+      # @param [String] key
+      #   The object key name for the uploaded file.
+      #
+      # @param [Hash] options
+      #   Additional options for {Client#create_multipart_upload}, {Client#complete_multipart_upload}, and
+      #   {Client#upload_part} can be provided.
+      #
+      # @option options [Integer] :thread_count (10)
+      #   The number of parallel multipart uploads. Only used when no custom executor is provided (creates
+      #   {DefaultExecutor} with the given thread count). An additional thread is used internally for task coordination.
+      #
+      # @option options [Boolean] :tempfile (false)
+      #   Normally read data is stored in memory when building the parts in order to complete the underlying
+      #   multipart upload. By passing `:tempfile => true`, the data read will be temporarily stored on disk reducing
+      #   the memory footprint vastly.
+      #
+      # @option options [Integer] :part_size (5242880)
+      #   Define how big each part size but the last should be. Default `:part_size` is `5 * 1024 * 1024`.
+      #
+      # @raise [MultipartUploadError] If an object is being uploaded in parts, and the upload can not be completed,
+      #   then the upload is aborted and this error is raised. The raised error has a `#errors` method that returns
+      #   the failures that caused the upload to be aborted.
+      #
+      # @return [Boolean] Returns `true` when the object is uploaded without any errors.
+      #
+      # @see Client#create_multipart_upload
+      # @see Client#complete_multipart_upload
+      # @see Client#upload_part
+      def upload_stream(bucket:, key:, **options, &block)
+        upload_opts = options.merge(bucket: bucket, key: key)
+        executor = @executor || DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
+        uploader = MultipartStreamUploader.new(
+          client: @client,
+          executor: executor,
+          tempfile: upload_opts.delete(:tempfile),
+          part_size: upload_opts.delete(:part_size)
+        )
+        uploader.upload(upload_opts, &block)
+        executor.shutdown unless @executor
+        true
+      end
+    end
+  end
+end