aws-sdk-s3 1.176.1 → 1.208.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +202 -0
  3. data/VERSION +1 -1
  4. data/lib/aws-sdk-s3/bucket.rb +86 -35
  5. data/lib/aws-sdk-s3/bucket_acl.rb +7 -6
  6. data/lib/aws-sdk-s3/bucket_cors.rb +6 -5
  7. data/lib/aws-sdk-s3/bucket_lifecycle.rb +2 -2
  8. data/lib/aws-sdk-s3/bucket_lifecycle_configuration.rb +3 -3
  9. data/lib/aws-sdk-s3/bucket_logging.rb +2 -2
  10. data/lib/aws-sdk-s3/bucket_policy.rb +6 -5
  11. data/lib/aws-sdk-s3/bucket_request_payment.rb +3 -3
  12. data/lib/aws-sdk-s3/bucket_tagging.rb +3 -3
  13. data/lib/aws-sdk-s3/bucket_versioning.rb +42 -9
  14. data/lib/aws-sdk-s3/bucket_website.rb +3 -3
  15. data/lib/aws-sdk-s3/client.rb +3038 -1226
  16. data/lib/aws-sdk-s3/client_api.rb +492 -164
  17. data/lib/aws-sdk-s3/customizations/object.rb +76 -86
  18. data/lib/aws-sdk-s3/customizations.rb +4 -1
  19. data/lib/aws-sdk-s3/default_executor.rb +103 -0
  20. data/lib/aws-sdk-s3/encryption/client.rb +2 -2
  21. data/lib/aws-sdk-s3/encryption/default_cipher_provider.rb +2 -0
  22. data/lib/aws-sdk-s3/encryption/encrypt_handler.rb +2 -0
  23. data/lib/aws-sdk-s3/encryption/kms_cipher_provider.rb +2 -0
  24. data/lib/aws-sdk-s3/encryptionV2/client.rb +98 -23
  25. data/lib/aws-sdk-s3/encryptionV2/decrypt_handler.rb +7 -162
  26. data/lib/aws-sdk-s3/encryptionV2/decryption.rb +205 -0
  27. data/lib/aws-sdk-s3/encryptionV2/default_cipher_provider.rb +17 -0
  28. data/lib/aws-sdk-s3/encryptionV2/encrypt_handler.rb +2 -0
  29. data/lib/aws-sdk-s3/encryptionV2/io_encrypter.rb +2 -0
  30. data/lib/aws-sdk-s3/encryptionV2/kms_cipher_provider.rb +8 -0
  31. data/lib/aws-sdk-s3/encryptionV2/utils.rb +5 -0
  32. data/lib/aws-sdk-s3/encryptionV3/client.rb +885 -0
  33. data/lib/aws-sdk-s3/encryptionV3/decrypt_handler.rb +98 -0
  34. data/lib/aws-sdk-s3/encryptionV3/decryption.rb +244 -0
  35. data/lib/aws-sdk-s3/encryptionV3/default_cipher_provider.rb +159 -0
  36. data/lib/aws-sdk-s3/encryptionV3/default_key_provider.rb +35 -0
  37. data/lib/aws-sdk-s3/encryptionV3/encrypt_handler.rb +98 -0
  38. data/lib/aws-sdk-s3/encryptionV3/errors.rb +47 -0
  39. data/lib/aws-sdk-s3/encryptionV3/io_auth_decrypter.rb +60 -0
  40. data/lib/aws-sdk-s3/encryptionV3/io_decrypter.rb +35 -0
  41. data/lib/aws-sdk-s3/encryptionV3/io_encrypter.rb +84 -0
  42. data/lib/aws-sdk-s3/encryptionV3/key_provider.rb +28 -0
  43. data/lib/aws-sdk-s3/encryptionV3/kms_cipher_provider.rb +159 -0
  44. data/lib/aws-sdk-s3/encryptionV3/materials.rb +58 -0
  45. data/lib/aws-sdk-s3/encryptionV3/utils.rb +321 -0
  46. data/lib/aws-sdk-s3/encryption_v2.rb +1 -0
  47. data/lib/aws-sdk-s3/encryption_v3.rb +24 -0
  48. data/lib/aws-sdk-s3/endpoint_parameters.rb +17 -17
  49. data/lib/aws-sdk-s3/endpoint_provider.rb +562 -304
  50. data/lib/aws-sdk-s3/endpoints.rb +110 -0
  51. data/lib/aws-sdk-s3/errors.rb +11 -0
  52. data/lib/aws-sdk-s3/file_downloader.rb +189 -143
  53. data/lib/aws-sdk-s3/file_uploader.rb +9 -13
  54. data/lib/aws-sdk-s3/legacy_signer.rb +2 -1
  55. data/lib/aws-sdk-s3/multipart_download_error.rb +8 -0
  56. data/lib/aws-sdk-s3/multipart_file_uploader.rb +105 -102
  57. data/lib/aws-sdk-s3/multipart_stream_uploader.rb +96 -107
  58. data/lib/aws-sdk-s3/multipart_upload.rb +50 -6
  59. data/lib/aws-sdk-s3/multipart_upload_error.rb +3 -4
  60. data/lib/aws-sdk-s3/multipart_upload_part.rb +50 -34
  61. data/lib/aws-sdk-s3/object.rb +264 -137
  62. data/lib/aws-sdk-s3/object_acl.rb +12 -6
  63. data/lib/aws-sdk-s3/object_multipart_copier.rb +2 -1
  64. data/lib/aws-sdk-s3/object_summary.rb +179 -103
  65. data/lib/aws-sdk-s3/object_version.rb +25 -23
  66. data/lib/aws-sdk-s3/plugins/checksum_algorithm.rb +31 -0
  67. data/lib/aws-sdk-s3/plugins/endpoints.rb +1 -1
  68. data/lib/aws-sdk-s3/plugins/express_session_auth.rb +11 -20
  69. data/lib/aws-sdk-s3/plugins/md5s.rb +10 -71
  70. data/lib/aws-sdk-s3/plugins/streaming_retry.rb +5 -7
  71. data/lib/aws-sdk-s3/plugins/url_encoded_keys.rb +2 -1
  72. data/lib/aws-sdk-s3/presigner.rb +4 -5
  73. data/lib/aws-sdk-s3/resource.rb +7 -1
  74. data/lib/aws-sdk-s3/transfer_manager.rb +303 -0
  75. data/lib/aws-sdk-s3/types.rb +2907 -1059
  76. data/lib/aws-sdk-s3.rb +1 -1
  77. data/sig/bucket.rbs +16 -6
  78. data/sig/bucket_acl.rbs +1 -1
  79. data/sig/bucket_cors.rbs +1 -1
  80. data/sig/bucket_lifecycle.rbs +1 -1
  81. data/sig/bucket_lifecycle_configuration.rbs +1 -1
  82. data/sig/bucket_logging.rbs +1 -1
  83. data/sig/bucket_policy.rbs +1 -1
  84. data/sig/bucket_request_payment.rbs +1 -1
  85. data/sig/bucket_tagging.rbs +1 -1
  86. data/sig/bucket_versioning.rbs +3 -3
  87. data/sig/bucket_website.rbs +1 -1
  88. data/sig/client.rbs +226 -64
  89. data/sig/errors.rbs +2 -0
  90. data/sig/multipart_upload.rbs +9 -2
  91. data/sig/multipart_upload_part.rbs +5 -1
  92. data/sig/object.rbs +31 -15
  93. data/sig/object_acl.rbs +1 -1
  94. data/sig/object_summary.rbs +22 -15
  95. data/sig/object_version.rbs +5 -2
  96. data/sig/resource.rbs +11 -2
  97. data/sig/types.rbs +281 -64
  98. metadata +26 -10
  99. data/lib/aws-sdk-s3/plugins/skip_whole_multipart_get_checksums.rb +0 -31
@@ -62,6 +62,18 @@ module Aws::S3
62
62
  data[:checksum_algorithm]
63
63
  end
64
64
 
65
+ # The checksum type that is used to calculate the object’s checksum
66
+ # value. For more information, see [Checking object integrity][1] in the
67
+ # *Amazon S3 User Guide*.
68
+ #
69
+ #
70
+ #
71
+ # [1]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
72
+ # @return [String]
73
+ def checksum_type
74
+ data[:checksum_type]
75
+ end
76
+
65
77
  # Size in bytes of the object.
66
78
  # @return [Integer]
67
79
  def size
@@ -300,21 +312,19 @@ module Aws::S3
300
312
  # you provide does not match the actual owner of the bucket, the request
301
313
  # fails with the HTTP status code `403 Forbidden` (access denied).
302
314
  # @option options [String] :if_match
303
- # The `If-Match` header field makes the request method conditional on
304
- # ETags. If the ETag value does not match, the operation returns a `412
305
- # Precondition Failed` error. If the ETag matches or if the object
306
- # doesn't exist, the operation will return a `204 Success (No Content)
307
- # response`.
308
- #
309
- # For more information about conditional requests, see [RFC 7232][1].
315
+ # Deletes the object if the ETag (entity tag) value provided during the
316
+ # delete operation matches the ETag of the object in S3. If the ETag
317
+ # values do not match, the operation returns a `412 Precondition Failed`
318
+ # error.
310
319
  #
311
- # <note markdown="1"> This functionality is only supported for directory buckets.
320
+ # Expects the ETag value as a string. `If-Match` does accept a string
321
+ # value of an '*' (asterisk) character to denote a match of any ETag.
312
322
  #
313
- # </note>
323
+ # For more information about conditional requests, see [RFC 7232][1].
314
324
  #
315
325
  #
316
326
  #
317
- # [1]: https://docs.aws.amazon.com/https:/tools.ietf.org/html/rfc7232
327
+ # [1]: https://tools.ietf.org/html/rfc7232
318
328
  # @option options [Time,DateTime,Date,Integer,String] :if_match_last_modified_time
319
329
  # If present, the object is deleted only if its modification times
320
330
  # matches the provided `Timestamp`. If the `Timestamp` values do not
@@ -566,15 +576,6 @@ module Aws::S3
566
576
  # fails with the HTTP status code `403 Forbidden` (access denied).
567
577
  # @option options [String] :checksum_mode
568
578
  # To retrieve the checksum, this mode must be enabled.
569
- #
570
- # **General purpose buckets** - In addition, if you enable checksum mode
571
- # and the object is uploaded with a [checksum][1] and encrypted with an
572
- # Key Management Service (KMS) key, you must have permission to use the
573
- # `kms:Decrypt` action to retrieve the checksum.
574
- #
575
- #
576
- #
577
- # [1]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Checksum.html
578
579
  # @return [Types::GetObjectOutput]
579
580
  def get(options = {}, &block)
580
581
  options = options.merge(
@@ -848,7 +849,7 @@ module Aws::S3
848
849
  # request_payer: "requester", # accepts requester
849
850
  # bypass_governance_retention: false,
850
851
  # expected_bucket_owner: "AccountId",
851
- # checksum_algorithm: "CRC32", # accepts CRC32, CRC32C, SHA1, SHA256
852
+ # checksum_algorithm: "CRC32", # accepts CRC32, CRC32C, SHA1, SHA256, CRC64NVME
852
853
  # })
853
854
  # @param options ({})
854
855
  # @option options [String] :mfa
@@ -916,6 +917,8 @@ module Aws::S3
916
917
  #
917
918
  # * `CRC32C`
918
919
  #
920
+ # * `CRC64NVME`
921
+ #
919
922
  # * `SHA1`
920
923
  #
921
924
  # * `SHA256`
@@ -925,9 +928,8 @@ module Aws::S3
925
928
  #
926
929
  # If the individual checksum value you provide through
927
930
  # `x-amz-checksum-algorithm ` doesn't match the checksum algorithm you
928
- # set through `x-amz-sdk-checksum-algorithm`, Amazon S3 ignores any
929
- # provided `ChecksumAlgorithm` parameter and uses the checksum algorithm
930
- # that matches the provided value in `x-amz-checksum-algorithm `.
931
+ # set through `x-amz-sdk-checksum-algorithm`, Amazon S3 fails the
932
+ # request with a `BadDigest` error.
931
933
  #
932
934
  # If you provide an individual checksum, Amazon S3 ignores any provided
933
935
  # `ChecksumAlgorithm` parameter.
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aws
4
+ module S3
5
+ module Plugins
6
+ # @api private
7
+ class ChecksumAlgorithm < Seahorse::Client::Plugin
8
+
9
+ # S3 GetObject results for whole Multipart Objects contain a checksum
10
+ # that cannot be validated. These should be skipped by the
11
+ # ChecksumAlgorithm plugin.
12
+ class SkipWholeMultipartGetChecksumsHandler < Seahorse::Client::Handler
13
+ def call(context)
14
+ context[:http_checksum] ||= {}
15
+ context[:http_checksum][:skip_on_suffix] = true
16
+
17
+ @handler.call(context)
18
+ end
19
+ end
20
+
21
+ def add_handlers(handlers, _config)
22
+ handlers.add(
23
+ SkipWholeMultipartGetChecksumsHandler,
24
+ step: :initialize,
25
+ operations: [:get_object]
26
+ )
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -25,7 +25,7 @@ The endpoint provider used to resolve endpoints. Any object that responds to
25
25
 
26
26
  option(
27
27
  :disable_s3_express_session_auth,
28
- doc_type: 'Boolean',
28
+ doc_type: 'boolean',
29
29
  docstring: <<~DOCS) do |cfg|
30
30
  Parameter to indicate whether S3Express session auth should be disabled
31
31
  DOCS
@@ -29,24 +29,17 @@ for different buckets.
29
29
  # @api private
30
30
  class Handler < Seahorse::Client::Handler
31
31
  def call(context)
32
- if (props = context[:endpoint_properties])
33
- # S3 Express endpoint - turn off md5 and enable crc32 default
34
- if props['backend'] == 'S3Express'
35
- if context.operation_name == :put_object || checksum_required?(context)
36
- context[:default_request_checksum_algorithm] = 'CRC32'
37
- end
38
- context[:s3_express_endpoint] = true
39
- end
32
+ context[:s3_express_endpoint] = true if s3_express_endpoint?(context)
40
33
 
41
- # if s3 express auth, use new credentials and sign additional header
42
- if context[:auth_scheme]['name'] == 'sigv4-s3express' &&
43
- !context.config.disable_s3_express_session_auth
44
- bucket = context.params[:bucket]
45
- credentials_provider = context.config.express_credentials_provider
46
- credentials = credentials_provider.express_credentials_for(bucket)
47
- context[:sigv4_credentials] = credentials # Sign will use this
48
- end
34
+ # if s3 express auth, use new credentials and sign additional header
35
+ if context[:auth_scheme]['name'] == 'sigv4-s3express' &&
36
+ !context.config.disable_s3_express_session_auth
37
+ bucket = context.params[:bucket]
38
+ credentials_provider = context.config.express_credentials_provider
39
+ credentials = credentials_provider.express_credentials_for(bucket)
40
+ context[:sigv4_credentials] = credentials # Sign will use this
49
41
  end
42
+
50
43
  with_metric(credentials) { @handler.call(context) }
51
44
  end
52
45
 
@@ -58,10 +51,8 @@ for different buckets.
58
51
  Aws::Plugins::UserAgent.metric('S3_EXPRESS_BUCKET', &block)
59
52
  end
60
53
 
61
- def checksum_required?(context)
62
- context.operation.http_checksum_required ||
63
- (context.operation.http_checksum &&
64
- context.operation.http_checksum['requestChecksumRequired'])
54
+ def s3_express_endpoint?(context)
55
+ context[:endpoint_properties]['backend'] == 'S3Express'
65
56
  end
66
57
  end
67
58
 
@@ -6,81 +6,20 @@ module Aws
6
6
  module S3
7
7
  module Plugins
8
8
  # @api private
9
- # This plugin is effectively deprecated in favor of modeled
9
+ # This plugin is deprecated in favor of modeled
10
10
  # httpChecksumRequired traits.
11
11
  class Md5s < Seahorse::Client::Plugin
12
- # These operations allow Content MD5 but are not required by
13
- # httpChecksumRequired. This list should not grow.
14
- OPTIONAL_OPERATIONS = [
15
- :put_object,
16
- :upload_part
17
- ]
18
-
19
- # @api private
20
- class Handler < Seahorse::Client::Handler
21
-
22
- CHUNK_SIZE = 1 * 1024 * 1024 # one MB
23
-
24
- def call(context)
25
- if !context[:checksum_algorithms] && # skip in favor of flexible checksum
26
- !context[:s3_express_endpoint] # s3 express endpoints do not support md5
27
- body = context.http_request.body
28
- if body.respond_to?(:size) && body.size > 0
29
- context.http_request.headers['Content-Md5'] ||= md5(body)
30
- end
31
- end
32
- @handler.call(context)
33
- end
34
-
35
- private
36
-
37
- # @param [File, Tempfile, IO#read, String] value
38
- # @return [String<MD5>]
39
- def md5(value)
40
- if (File === value || Tempfile === value) && !value.path.nil? && File.exist?(value.path)
41
- OpenSSL::Digest::MD5.file(value).base64digest
42
- elsif value.respond_to?(:read)
43
- md5 = OpenSSL::Digest::MD5.new
44
- update_in_chunks(md5, value)
45
- md5.base64digest
46
- else
47
- OpenSSL::Digest::MD5.digest(value).base64digest
48
- end
49
- end
50
-
51
- def update_in_chunks(digest, io)
52
- loop do
53
- chunk = io.read(CHUNK_SIZE)
54
- break unless chunk
55
- digest.update(chunk)
56
- end
57
- io.rewind
58
- end
59
-
60
- end
61
-
62
12
  option(:compute_checksums,
63
- default: true,
64
- doc_type: 'Boolean',
65
- docstring: <<-DOCS)
66
- When `true` a MD5 checksum will be computed and sent in the Content Md5
67
- header for :put_object and :upload_part. When `false`, MD5 checksums
68
- will not be computed for these operations. Checksums are still computed
69
- for operations requiring them. Checksum errors returned by Amazon S3 are
70
- automatically retried up to `:retry_limit` times.
71
- DOCS
72
-
73
- def add_handlers(handlers, config)
74
- if config.compute_checksums
75
- # priority set low to ensure md5 is computed AFTER the request is
76
- # built but before it is signed
77
- handlers.add(
78
- Handler,
79
- priority: 10, step: :build, operations: OPTIONAL_OPERATIONS
80
- )
81
- end
13
+ default: true,
14
+ doc_type: 'Boolean',
15
+ docstring: <<~DOCS)
16
+ This option is deprecated. Please use `:request_checksum_calculation` instead.
17
+ When `false`, `request_checksum_calculation` is overridden to `when_required`.
18
+ DOCS
19
+
20
+ def after_initialize(client)
21
+ client.config.request_checksum_calculation = 'when_required' unless client.config.compute_checksums
82
22
  end
83
-
84
23
  end
85
24
  end
86
25
  end
@@ -62,18 +62,16 @@ module Aws
62
62
  class Handler < Seahorse::Client::Handler
63
63
 
64
64
  def call(context)
65
- target = context.params[:response_target] || context[:response_target]
66
-
67
65
  # retry is only supported when range is NOT set on the initial request
68
- if supported_target?(target) && !context.params[:range]
69
- add_event_listeners(context, target)
66
+ if supported_target?(context) && !context.params[:range]
67
+ add_event_listeners(context)
70
68
  end
71
69
  @handler.call(context)
72
70
  end
73
71
 
74
72
  private
75
73
 
76
- def add_event_listeners(context, target)
74
+ def add_event_listeners(context)
77
75
  context.http_response.on_headers(200..299) do
78
76
  case context.http_response.body
79
77
  when Seahorse::Client::BlockIO then
@@ -123,8 +121,8 @@ module Aws
123
121
  context.http_response.body.is_a?(RetryableManagedFile)
124
122
  end
125
123
 
126
- def supported_target?(target)
127
- case target
124
+ def supported_target?(context)
125
+ case context[:response_target]
128
126
  when Proc, String, Pathname then true
129
127
  else false
130
128
  end
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'uri'
4
- require 'cgi'
4
+ require "cgi/escape"
5
+ require "cgi/util" if RUBY_VERSION < "3.5"
5
6
 
6
7
  module Aws
7
8
  module S3
@@ -193,15 +193,14 @@ module Aws
193
193
  req, expires_in, secure, time, unsigned_headers, hoist = true
194
194
  )
195
195
  x_amz_headers = {}
196
-
197
196
  http_req = req.context.http_request
198
-
199
- req.handlers.remove(Aws::S3::Plugins::S3Signer::LegacyHandler)
200
- req.handlers.remove(Aws::Plugins::Sign::Handler)
201
197
  req.handlers.remove(Seahorse::Client::Plugins::ContentLength::Handler)
202
198
  req.handlers.remove(Aws::Rest::ContentTypeHandler)
199
+ req.handlers.remove(Aws::Plugins::ChecksumAlgorithm::OptionHandler)
200
+ req.handlers.remove(Aws::Plugins::ChecksumAlgorithm::ChecksumHandler)
203
201
  req.handlers.remove(Aws::Plugins::InvocationId::Handler)
204
-
202
+ req.handlers.remove(Aws::Plugins::Sign::Handler)
203
+ req.handlers.remove(Aws::S3::Plugins::S3Signer::LegacyHandler)
205
204
  req.handle(step: :send) do |context|
206
205
  # if an endpoint was not provided, force secure or insecure
207
206
  if context.config.regional_endpoint
@@ -41,7 +41,7 @@ module Aws::S3
41
41
  # acl: "private", # accepts private, public-read, public-read-write, authenticated-read
42
42
  # bucket: "BucketName", # required
43
43
  # create_bucket_configuration: {
44
- # location_constraint: "af-south-1", # accepts af-south-1, ap-east-1, ap-northeast-1, ap-northeast-2, ap-northeast-3, ap-south-1, ap-south-2, ap-southeast-1, ap-southeast-2, ap-southeast-3, ca-central-1, cn-north-1, cn-northwest-1, EU, eu-central-1, eu-north-1, eu-south-1, eu-south-2, eu-west-1, eu-west-2, eu-west-3, me-south-1, sa-east-1, us-east-2, us-gov-east-1, us-gov-west-1, us-west-1, us-west-2
44
+ # location_constraint: "af-south-1", # accepts af-south-1, ap-east-1, ap-northeast-1, ap-northeast-2, ap-northeast-3, ap-south-1, ap-south-2, ap-southeast-1, ap-southeast-2, ap-southeast-3, ap-southeast-4, ap-southeast-5, ca-central-1, cn-north-1, cn-northwest-1, EU, eu-central-1, eu-central-2, eu-north-1, eu-south-1, eu-south-2, eu-west-1, eu-west-2, eu-west-3, il-central-1, me-central-1, me-south-1, sa-east-1, us-east-2, us-gov-east-1, us-gov-west-1, us-west-1, us-west-2
45
45
  # location: {
46
46
  # type: "AvailabilityZone", # accepts AvailabilityZone, LocalZone
47
47
  # name: "LocationNameAsString",
@@ -50,6 +50,12 @@ module Aws::S3
50
50
  # data_redundancy: "SingleAvailabilityZone", # accepts SingleAvailabilityZone, SingleLocalZone
51
51
  # type: "Directory", # accepts Directory
52
52
  # },
53
+ # tags: [
54
+ # {
55
+ # key: "ObjectKey", # required
56
+ # value: "Value", # required
57
+ # },
58
+ # ],
53
59
  # },
54
60
  # grant_full_control: "GrantFullControl",
55
61
  # grant_read: "GrantRead",
@@ -0,0 +1,303 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Aws
4
+ module S3
5
+ # A high-level S3 transfer utility that provides enhanced upload and download capabilities with automatic
6
+ # multipart handling, progress tracking, and handling of large files. The following features are supported:
7
+ #
8
+ # * upload a file with multipart upload
9
+ # * upload a stream with multipart upload
10
+ # * download a S3 object with multipart download
11
+ # * track transfer progress by using progress listener
12
+ #
13
+ # ## Executor Management
14
+ # TransferManager uses executors to handle concurrent operations during multipart transfers. You can control
15
+ # concurrency behavior by providing a custom executor or relying on the default executor management.
16
+ #
17
+ # ### Default Behavior
18
+ # When no `:executor` is provided, TransferManager creates a new DefaultExecutor for each individual
19
+ # operation (`download_file`, `upload_file`, etc.) and automatically shuts it down when that operation completes.
20
+ # Each operation gets its own isolated thread pool with the specified `:thread_count` (default 10 threads).
21
+ #
22
+ # ### Custom Executor
23
+ # You can provide your own executor (e.g., `Concurrent::ThreadPoolExecutor`) for fine-grained control over thread
24
+ # pools and resource management. When using a custom executor, you are responsible for shutting it down
25
+ # when finished. The executor may be reused across multiple TransferManager operations.
26
+ #
27
+ # Custom executors must implement the same interface as DefaultExecutor.
28
+ #
29
+ # **Required methods:**
30
+ #
31
+ # * `post(*args, &block)` - Execute a task with given arguments and block
32
+ # * `kill` - Immediately terminate all running tasks
33
+ #
34
+ # **Optional methods:**
35
+ #
36
+ # * `shutdown(timeout = nil)` - Gracefully shutdown the executor with optional timeout
37
+ #
38
+ # @example Using default executor (automatic creation and shutdown)
39
+ # tm = TransferManager.new # No executor provided
40
+ # # DefaultExecutor created, used, and shutdown automatically
41
+ # tm.download_file('/path/to/file', bucket: 'bucket', key: 'key')
42
+ #
43
+ # @example Using custom executor (manual shutdown required)
44
+ # require 'concurrent-ruby'
45
+ #
46
+ # executor = Concurrent::ThreadPoolExecutor.new(max_threads: 5)
47
+ # tm = TransferManager.new(executor: executor)
48
+ # tm.download_file('/path/to/file1', bucket: 'bucket', key: 'key1')
49
+ # executor.shutdown # You must shutdown custom executors
50
+ #
51
+ class TransferManager
52
+
53
+ # @param [Hash] options
54
+ # @option options [S3::Client] :client (S3::Client.new)
55
+ # The S3 client to use for {TransferManager} operations. If not provided, a new default client
56
+ # will be created automatically.
57
+ # @option options [Object] :executor
58
+ # The executor to use for multipart operations. Must implement the same interface as {DefaultExecutor}.
59
+ # If not provided, a new {DefaultExecutor} will be created automatically for each operation and
60
+ # shutdown after completion. When provided a custom executor, it will be reused across operations, and
61
+ # you are responsible for shutting it down when finished.
62
+ def initialize(options = {})
63
+ @client = options[:client] || Client.new
64
+ @executor = options[:executor]
65
+ end
66
+
67
+ # @return [S3::Client]
68
+ attr_reader :client
69
+
70
+ # @return [Object]
71
+ attr_reader :executor
72
+
73
+ # Downloads a file in S3 to a path on disk.
74
+ #
75
+ # # small files (< 5MB) are downloaded in a single API call
76
+ # tm = TransferManager.new
77
+ # tm.download_file('/path/to/file', bucket: 'bucket', key: 'key')
78
+ #
79
+ # Files larger than 5MB are downloaded using multipart method:
80
+ #
81
+ # # large files are split into parts and the parts are downloaded in parallel
82
+ # tm.download_file('/path/to/large_file', bucket: 'bucket', key: 'key')
83
+ #
84
+ # You can provide a callback to monitor progress of the download:
85
+ #
86
+ # # bytes and part_sizes are each an array with 1 entry per part
87
+ # # part_sizes may not be known until the first bytes are retrieved
88
+ # progress = proc do |bytes, part_sizes, file_size|
89
+ # bytes.map.with_index do |b, i|
90
+ # puts "Part #{i + 1}: #{b} / #{part_sizes[i]}".join(' ') + "Total: #{100.0 * bytes.sum / file_size}%"
91
+ # end
92
+ # end
93
+ # tm.download_file('/path/to/file', bucket: 'bucket', key: 'key', progress_callback: progress)
94
+ #
95
+ # @param [String, Pathname, File, Tempfile] destination
96
+ # Where to download the file to. This can either be a String or Pathname to the file, an open File object,
97
+ # or an open Tempfile object. If you pass an open File or Tempfile object, then you are responsible for
98
+ # closing it after the download completes. Download behavior varies by destination type:
99
+ #
100
+ # * **String/Pathname paths**: Downloads to a temporary file first, then atomically moves to the final
101
+ # destination. This prevents corruption of any existing file if the download fails.
102
+ # * **File/Tempfile objects**: Downloads directly to the file object without using temporary files.
103
+ # You are responsible for managing the file object's state and closing it after the download completes.
104
+ # If the download fails, the file object may contain partial data.
105
+ #
106
+ # @param [String] bucket
107
+ # The name of the S3 bucket to upload to.
108
+ #
109
+ # @param [String] key
110
+ # The object key name in S3 bucket.
111
+ #
112
+ # @param [Hash] options
113
+ # Additional options for {Client#get_object} and #{Client#head_object} may be provided.
114
+ #
115
+ # @option options [String] :mode ("auto") `"auto"`, `"single_request"` or `"get_range"`
116
+ #
117
+ # * `"auto"` mode is enabled by default, which performs `multipart_download`
118
+ # * `"single_request`" mode forces only 1 GET request is made in download
119
+ # * `"get_range"` mode requires `:chunk_size` parameter to configured in customizing each range size
120
+ #
121
+ # @option options [Integer] :chunk_size required in `"get_range"` mode.
122
+ #
123
+ # @option options [Integer] :thread_count (10) Customize threads used in the multipart download.
124
+ # Only used when no custom executor is provided (creates {DefaultExecutor} with given thread count).
125
+ #
126
+ # @option options [String] :checksum_mode ("ENABLED")
127
+ # This option is deprecated. Use `:response_checksum_validation` on your S3 client instead.
128
+ # To disable checksum validation, set `response_checksum_validation: 'when_required'`
129
+ # when creating your S3 client.
130
+ #
131
+ # @option options [Callable] :on_checksum_validated
132
+ # Called each time a request's checksum is validated with the checksum algorithm and the
133
+ # response. For multipart downloads, this will be called for each part that is downloaded and validated.
134
+ #
135
+ # @option options [Proc] :progress_callback
136
+ # A Proc that will be called when each chunk of the download is received. It will be invoked with
137
+ # `bytes_read`, `part_sizes`, `file_size`. When the object is downloaded as parts (rather than by ranges),
138
+ # the `part_sizes` will not be known ahead of time and will be `nil` in the callback until the first bytes
139
+ # in the part are received.
140
+ #
141
+ # @raise [MultipartDownloadError] Raised when an object validation fails outside of service errors.
142
+ #
143
+ # @return [Boolean] Returns `true` when the file is downloaded without any errors.
144
+ #
145
+ # @see Client#get_object
146
+ # @see Client#head_object
147
+ def download_file(destination, bucket:, key:, **options)
148
+ download_opts = options.merge(bucket: bucket, key: key)
149
+ executor = @executor || DefaultExecutor.new(max_threads: download_opts.delete(:thread_count))
150
+ downloader = FileDownloader.new(client: @client, executor: executor)
151
+ downloader.download(destination, download_opts)
152
+ executor.shutdown unless @executor
153
+ true
154
+ end
155
+
156
+ # Uploads a file from disk to S3.
157
+ #
158
+ # # a small file are uploaded with PutObject API
159
+ # tm = TransferManager.new
160
+ # tm.upload_file('/path/to/small_file', bucket: 'bucket', key: 'key')
161
+ #
162
+ # Files larger than or equal to `:multipart_threshold` are uploaded using multipart upload APIs.
163
+ #
164
+ # # large files are automatically split into parts and the parts are uploaded in parallel
165
+ # tm.upload_file('/path/to/large_file', bucket: 'bucket', key: 'key')
166
+ #
167
+ # The response of the S3 upload API is yielded if a block given.
168
+ #
169
+ # # API response will have etag value of the file
170
+ # tm.upload_file('/path/to/file', bucket: 'bucket', key: 'key') do |response|
171
+ # etag = response.etag
172
+ # end
173
+ #
174
+ # You can provide a callback to monitor progress of the upload:
175
+ #
176
+ # # bytes and totals are each an array with 1 entry per part
177
+ # progress = proc do |bytes, totals|
178
+ # bytes.map.with_index do |b, i|
179
+ # puts "Part #{i + 1}: #{b} / #{totals[i]} " + "Total: #{100.0 * bytes.sum / totals.sum}%"
180
+ # end
181
+ # end
182
+ # tm.upload_file('/path/to/file', bucket: 'bucket', key: 'key', progress_callback: progress)
183
+ #
184
+ # @param [String, Pathname, File, Tempfile] source
185
+ # A file on the local file system that will be uploaded. This can either be a `String` or `Pathname` to the
186
+ # file, an open `File` object, or an open `Tempfile` object. If you pass an open `File` or `Tempfile` object,
187
+ # then you are responsible for closing it after the upload completes. When using an open Tempfile, rewind it
188
+ # before uploading or else the object will be empty.
189
+ #
190
+ # @param [String] bucket
191
+ # The name of the S3 bucket to upload to.
192
+ #
193
+ # @param [String] key
194
+ # The object key name for the uploaded file.
195
+ #
196
+ # @param [Hash] options
197
+ # Additional options for {Client#put_object} when file sizes below the multipart threshold.
198
+ # For files larger than the multipart threshold, options for {Client#create_multipart_upload},
199
+ # {Client#complete_multipart_upload}, and {Client#upload_part} can be provided.
200
+ #
201
+ # @option options [Integer] :multipart_threshold (104857600)
202
+ # Files larger han or equal to `:multipart_threshold` are uploaded using the S3 multipart upload APIs.
203
+ # Default threshold is `100MB`.
204
+ #
205
+ # @option options [Integer] :thread_count (10) Customize threads used in the multipart upload.
206
+ # Only used when no custom executor is provided (creates {DefaultExecutor} with the given thread count).
207
+ #
208
+ # @option options [Proc] :progress_callback (nil)
209
+ # A Proc that will be called when each chunk of the upload is sent.
210
+ # It will be invoked with `[bytes_read]` and `[total_sizes]`.
211
+ #
212
+ # @raise [MultipartUploadError] If a file is being uploaded in parts, and the upload can not be completed,
213
+ # then the upload is aborted and this error is raised. The raised error has a `#errors` method that
214
+ # returns the failures that caused the upload to be aborted.
215
+ #
216
+ # @return [Boolean] Returns `true` when the file is uploaded without any errors.
217
+ #
218
+ # @see Client#put_object
219
+ # @see Client#create_multipart_upload
220
+ # @see Client#complete_multipart_upload
221
+ # @see Client#upload_part
222
+ def upload_file(source, bucket:, key:, **options)
223
+ upload_opts = options.merge(bucket: bucket, key: key)
224
+ executor = @executor || DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
225
+ uploader = FileUploader.new(
226
+ multipart_threshold: upload_opts.delete(:multipart_threshold),
227
+ client: @client,
228
+ executor: executor
229
+ )
230
+ response = uploader.upload(source, upload_opts)
231
+ yield response if block_given?
232
+ executor.shutdown unless @executor
233
+ true
234
+ end
235
+
236
+ # Uploads a stream in a streaming fashion to S3.
237
+ #
238
+ # Passed chunks automatically split into multipart upload parts and the parts are uploaded in parallel.
239
+ # This allows for streaming uploads that never touch the disk.
240
+ #
241
+ # **Note**: There are known issues in JRuby until jruby-9.1.15.0, so avoid using this with older JRuby versions.
242
+ #
243
+ # @example Streaming chunks of data
244
+ # tm = TransferManager.new
245
+ # tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream|
246
+ # 10.times { write_stream << 'foo' }
247
+ # end
248
+ # @example Streaming chunks of data
249
+ # tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream|
250
+ # IO.copy_stream(IO.popen('ls'), write_stream)
251
+ # end
252
+ # @example Streaming chunks of data
253
+ # tm.upload_stream(bucket: 'bucket', key: 'key') do |write_stream|
254
+ # IO.copy_stream(STDIN, write_stream)
255
+ # end
256
+ #
257
+ # @param [String] bucket
258
+ # The name of the S3 bucket to upload to.
259
+ #
260
+ # @param [String] key
261
+ # The object key name for the uploaded file.
262
+ #
263
+ # @param [Hash] options
264
+ # Additional options for {Client#create_multipart_upload}, {Client#complete_multipart_upload}, and
265
+ # {Client#upload_part} can be provided.
266
+ #
267
+ # @option options [Integer] :thread_count (10)
268
+ # The number of parallel multipart uploads. Only used when no custom executor is provided (creates
269
+ # {DefaultExecutor} with the given thread count). An additional thread is used internally for task coordination.
270
+ #
271
+ # @option options [Boolean] :tempfile (false)
272
+ # Normally read data is stored in memory when building the parts in order to complete the underlying
273
+ # multipart upload. By passing `:tempfile => true`, the data read will be temporarily stored on disk reducing
274
+ # the memory footprint vastly.
275
+ #
276
+ # @option options [Integer] :part_size (5242880)
277
+ # Define how big each part size but the last should be. Default `:part_size` is `5 * 1024 * 1024`.
278
+ #
279
+ # @raise [MultipartUploadError] If an object is being uploaded in parts, and the upload can not be completed,
280
+ # then the upload is aborted and this error is raised. The raised error has a `#errors` method that returns
281
+ # the failures that caused the upload to be aborted.
282
+ #
283
+ # @return [Boolean] Returns `true` when the object is uploaded without any errors.
284
+ #
285
+ # @see Client#create_multipart_upload
286
+ # @see Client#complete_multipart_upload
287
+ # @see Client#upload_part
288
+ def upload_stream(bucket:, key:, **options, &block)
289
+ upload_opts = options.merge(bucket: bucket, key: key)
290
+ executor = @executor || DefaultExecutor.new(max_threads: upload_opts.delete(:thread_count))
291
+ uploader = MultipartStreamUploader.new(
292
+ client: @client,
293
+ executor: executor,
294
+ tempfile: upload_opts.delete(:tempfile),
295
+ part_size: upload_opts.delete(:part_size)
296
+ )
297
+ uploader.upload(upload_opts, &block)
298
+ executor.shutdown unless @executor
299
+ true
300
+ end
301
+ end
302
+ end
303
+ end