aws-sdk-s3 1.9.0 → 1.24.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,15 +24,18 @@ module Aws
24
24
  @mode = options[:mode] || "auto"
25
25
  @thread_count = options[:thread_count] || THREAD_COUNT
26
26
  @chunk_size = options[:chunk_size]
27
- @bucket = options[:bucket]
28
- @key = options[:key]
27
+ @params = {
28
+ bucket: options[:bucket],
29
+ key: options[:key],
30
+ }
31
+ @params[:version_id] = options[:version_id] if options[:version_id]
29
32
 
30
33
  case @mode
31
34
  when "auto" then multipart_download
32
35
  when "single_request" then single_request
33
36
  when "get_range"
34
37
  if @chunk_size
35
- resp = @client.head_object(bucket: @bucket, key: @key)
38
+ resp = @client.head_object(@params)
36
39
  multithreaded_get_by_ranges(construct_chunks(resp.content_length))
37
40
  else
38
41
  msg = "In :get_range mode, :chunk_size must be provided"
@@ -48,7 +51,7 @@ module Aws
48
51
  private
49
52
 
50
53
  def multipart_download
51
- resp = @client.head_object(bucket: @bucket, key: @key, part_number: 1)
54
+ resp = @client.head_object(@params.merge(part_number: 1))
52
55
  count = resp.parts_count
53
56
  if count.nil? || count <= 1
54
57
  resp.content_length < MIN_CHUNK_SIZE ?
@@ -56,7 +59,7 @@ module Aws
56
59
  multithreaded_get_by_ranges(construct_chunks(resp.content_length))
57
60
  else
58
61
  # partNumber is an option
59
- resp = @client.head_object(bucket: @bucket, key: @key)
62
+ resp = @client.head_object(@params)
60
63
  resp.content_length < MIN_CHUNK_SIZE ?
61
64
  single_request :
62
65
  compute_mode(resp.content_length, count)
@@ -112,9 +115,7 @@ module Aws
112
115
  batch.each do |chunk|
113
116
  threads << Thread.new do
114
117
  resp = @client.get_object(
115
- :bucket => @bucket,
116
- :key => @key,
117
- param.to_sym => chunk
118
+ @params.merge(param.to_sym => chunk)
118
119
  )
119
120
  write(resp)
120
121
  end
@@ -131,7 +132,7 @@ module Aws
131
132
 
132
133
  def single_request
133
134
  @client.get_object(
134
- bucket: @bucket, key: @key, response_target: @path
135
+ @params.merge(response_target: @path)
135
136
  )
136
137
  end
137
138
  end
@@ -56,14 +56,12 @@ module Aws
56
56
  end
57
57
 
58
58
  def read_from_file(bytes, output_buffer)
59
- if bytes
60
- data = @file.read([remaining_bytes, bytes].min)
61
- data = nil if data == ''
62
- else
63
- data = @file.read(remaining_bytes)
64
- end
59
+ length = [remaining_bytes, *bytes].min
60
+ data = @file.read(length, output_buffer)
61
+
65
62
  @position += data ? data.bytesize : 0
66
- output_buffer ? output_buffer.replace(data || '') : data
63
+
64
+ data.to_s unless bytes && (data.nil? || data.empty?)
67
65
  end
68
66
 
69
67
  def remaining_bytes
@@ -45,9 +45,7 @@ module Aws
45
45
 
46
46
  def open_file(source)
47
47
  if String === source || Pathname === source
48
- file = File.open(source, 'rb')
49
- yield(file)
50
- file.close
48
+ File.open(source, 'rb') { |file| yield(file) }
51
49
  else
52
50
  yield(source)
53
51
  end
@@ -0,0 +1,160 @@
1
+ require 'thread'
2
+ require 'set'
3
+ require 'tempfile'
4
+ require 'stringio'
5
+
6
+ module Aws
7
+ module S3
8
+ # @api private
9
+ class MultipartStreamUploader
10
+ # api private
11
+ PART_SIZE = 5 * 1024 * 1024 # 5MB
12
+
13
+ # api private
14
+ THREAD_COUNT = 10
15
+
16
+ # api private
17
+ TEMPFILE_PREIX = 'aws-sdk-s3-upload_stream'.freeze
18
+
19
+ # @api private
20
+ CREATE_OPTIONS =
21
+ Set.new(Client.api.operation(:create_multipart_upload).input.shape.member_names)
22
+
23
+ # @api private
24
+ UPLOAD_PART_OPTIONS =
25
+ Set.new(Client.api.operation(:upload_part).input.shape.member_names)
26
+
27
+ # @option options [Client] :client
28
+ def initialize(options = {})
29
+ @client = options[:client] || Client.new
30
+ @tempfile = options[:tempfile]
31
+ @part_size = options[:part_size] || PART_SIZE
32
+ @thread_count = options[:thread_count] || THREAD_COUNT
33
+ end
34
+
35
+ # @return [Client]
36
+ attr_reader :client
37
+
38
+ # @option options [required,String] :bucket
39
+ # @option options [required,String] :key
40
+ # @return [void]
41
+ def upload(options = {}, &block)
42
+ upload_id = initiate_upload(options)
43
+ parts = upload_parts(upload_id, options, &block)
44
+ complete_upload(upload_id, parts, options)
45
+ end
46
+
47
+ private
48
+
49
+ def initiate_upload(options)
50
+ @client.create_multipart_upload(create_opts(options)).upload_id
51
+ end
52
+
53
+ def complete_upload(upload_id, parts, options)
54
+ @client.complete_multipart_upload(
55
+ bucket: options[:bucket],
56
+ key: options[:key],
57
+ upload_id: upload_id,
58
+ multipart_upload: { parts: parts })
59
+ end
60
+
61
+ def upload_parts(upload_id, options, &block)
62
+ completed = Queue.new
63
+ errors = IO.pipe do |read_pipe, write_pipe|
64
+ threads = upload_in_threads(read_pipe, completed, upload_part_opts(options).merge(upload_id: upload_id))
65
+ block.call(write_pipe)
66
+ write_pipe.close
67
+ threads.map(&:value).compact
68
+ end
69
+ if errors.empty?
70
+ Array.new(completed.size) { completed.pop }.sort_by { |part| part[:part_number] }
71
+ else
72
+ abort_upload(upload_id, options, errors)
73
+ end
74
+ end
75
+
76
+ def abort_upload(upload_id, options, errors)
77
+ @client.abort_multipart_upload(
78
+ bucket: options[:bucket],
79
+ key: options[:key],
80
+ upload_id: upload_id
81
+ )
82
+ msg = "multipart upload failed: #{errors.map(&:message).join("; ")}"
83
+ raise MultipartUploadError.new(msg, errors)
84
+ rescue MultipartUploadError => error
85
+ raise error
86
+ rescue => error
87
+ msg = "failed to abort multipart upload: #{error.message}"
88
+ raise MultipartUploadError.new(msg, errors + [error])
89
+ end
90
+
91
+ def create_opts(options)
92
+ CREATE_OPTIONS.inject({}) do |hash, key|
93
+ hash[key] = options[key] if options.key?(key)
94
+ hash
95
+ end
96
+ end
97
+
98
+ def upload_part_opts(options)
99
+ UPLOAD_PART_OPTIONS.inject({}) do |hash, key|
100
+ hash[key] = options[key] if options.key?(key)
101
+ hash
102
+ end
103
+ end
104
+
105
+ def read_to_part_body(read_pipe)
106
+ return if read_pipe.closed?
107
+ temp_io = @tempfile ? Tempfile.new(TEMPFILE_PREIX) : StringIO.new
108
+ temp_io.binmode
109
+ bytes_copied = IO.copy_stream(read_pipe, temp_io, @part_size)
110
+ temp_io.rewind
111
+ if bytes_copied == 0
112
+ if Tempfile === temp_io
113
+ temp_io.close
114
+ temp_io.unlink
115
+ end
116
+ nil
117
+ else
118
+ temp_io
119
+ end
120
+ end
121
+
122
+ def upload_in_threads(read_pipe, completed, options)
123
+ mutex = Mutex.new
124
+ part_number = 0
125
+ @thread_count.times.map do
126
+ thread = Thread.new do
127
+ begin
128
+ loop do
129
+ body, thread_part_number = mutex.synchronize do
130
+ [read_to_part_body(read_pipe), part_number += 1]
131
+ end
132
+ break unless (body || thread_part_number == 1)
133
+ begin
134
+ part = options.merge(
135
+ body: body,
136
+ part_number: thread_part_number,
137
+ )
138
+ resp = @client.upload_part(part)
139
+ completed << {etag: resp.etag, part_number: part[:part_number]}
140
+ ensure
141
+ if Tempfile === body
142
+ body.close
143
+ body.unlink
144
+ end
145
+ end
146
+ end
147
+ nil
148
+ rescue => error
149
+ # keep other threads from uploading other parts
150
+ mutex.synchronize { read_pipe.close_read }
151
+ error
152
+ end
153
+ end
154
+ thread.abort_on_exception = true
155
+ thread
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -66,7 +66,7 @@ module Aws::S3
66
66
  data[:etag]
67
67
  end
68
68
 
69
- # Size of the uploaded part data.
69
+ # Size in bytes of the uploaded part data.
70
70
  # @return [Integer]
71
71
  def size
72
72
  data[:size]
@@ -866,7 +866,7 @@ module Aws::S3
866
866
  # http://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectsinRequesterPaysBuckets.html
867
867
  # @option options [String] :tagging
868
868
  # The tag-set for the object. The tag-set must be encoded as URL Query
869
- # parameters
869
+ # parameters. (For example, "Key1=Value1")
870
870
  # @return [Types::PutObjectOutput]
871
871
  def put(options = {})
872
872
  options = options.merge(
@@ -898,11 +898,14 @@ module Aws::S3
898
898
  # record_delimiter: "RecordDelimiter",
899
899
  # field_delimiter: "FieldDelimiter",
900
900
  # quote_character: "QuoteCharacter",
901
+ # allow_quoted_record_delimiter: false,
901
902
  # },
902
- # compression_type: "NONE", # accepts NONE, GZIP
903
+ # compression_type: "NONE", # accepts NONE, GZIP, BZIP2
903
904
  # json: {
904
905
  # type: "DOCUMENT", # accepts DOCUMENT, LINES
905
906
  # },
907
+ # parquet: {
908
+ # },
906
909
  # },
907
910
  # expression_type: "SQL", # required, accepts SQL
908
911
  # expression: "Expression", # required
@@ -709,7 +709,7 @@ module Aws::S3
709
709
  # http://docs.aws.amazon.com/AmazonS3/latest/dev/ObjectsinRequesterPaysBuckets.html
710
710
  # @option options [String] :tagging
711
711
  # The tag-set for the object. The tag-set must be encoded as URL Query
712
- # parameters
712
+ # parameters. (For example, "Key1=Value1")
713
713
  # @return [Types::PutObjectOutput]
714
714
  def put(options = {})
715
715
  options = options.merge(
@@ -741,11 +741,14 @@ module Aws::S3
741
741
  # record_delimiter: "RecordDelimiter",
742
742
  # field_delimiter: "FieldDelimiter",
743
743
  # quote_character: "QuoteCharacter",
744
+ # allow_quoted_record_delimiter: false,
744
745
  # },
745
- # compression_type: "NONE", # accepts NONE, GZIP
746
+ # compression_type: "NONE", # accepts NONE, GZIP, BZIP2
746
747
  # json: {
747
748
  # type: "DOCUMENT", # accepts DOCUMENT, LINES
748
749
  # },
750
+ # parquet: {
751
+ # },
749
752
  # },
750
753
  # expression_type: "SQL", # required, accepts SQL
751
754
  # expression: "Expression", # required
@@ -60,6 +60,8 @@ each bucket. [Go here for more information](http://docs.aws.amazon.com/AmazonS3
60
60
  endpoint.port = 443
61
61
  endpoint.host = "#{bucket_name}.s3-accelerate.amazonaws.com"
62
62
  context.http_request.endpoint = endpoint.to_s
63
+ # s3 accelerate endpoint doesn't work with 'expect' header
64
+ context.http_request.headers.delete('expect')
63
65
  end
64
66
 
65
67
  def use_combined_accelerate_dualstack_endpoint(context)
@@ -70,6 +72,8 @@ each bucket. [Go here for more information](http://docs.aws.amazon.com/AmazonS3
70
72
  endpoint.port = 443
71
73
  endpoint.host = "#{bucket_name}.s3-accelerate.dualstack.amazonaws.com"
72
74
  context.http_request.endpoint = endpoint.to_s
75
+ # s3 accelerate endpoint doesn't work with 'expect' header
76
+ context.http_request.headers.delete('expect')
73
77
  end
74
78
 
75
79
  def validate_bucket_name!(bucket_name)
@@ -45,7 +45,7 @@ module Aws
45
45
  end
46
46
 
47
47
  def update_in_chunks(digest, io)
48
- while chunk = io.read(CHUNK_SIZE)
48
+ while chunk = io.read(CHUNK_SIZE, buffer ||= "")
49
49
  digest.update(chunk)
50
50
  end
51
51
  io.rewind
@@ -18,7 +18,9 @@ by Amazon S3.
18
18
  response = @handler.call(context)
19
19
  if context.http_response.status_code == 307
20
20
  endpoint = context.http_response.headers['location']
21
- context.http_request.endpoint = endpoint
21
+ unless context.http_request.endpoint.host.include?('fips')
22
+ context.http_request.endpoint = endpoint
23
+ end
22
24
  context.http_response.body.truncate(0)
23
25
  @handler.call(context)
24
26
  else
@@ -113,7 +113,7 @@ module Aws
113
113
  private
114
114
 
115
115
  def handle_region_errors(response)
116
- if wrong_sigv4_region?(response)
116
+ if wrong_sigv4_region?(response) && !fips_region?(response)
117
117
  get_region_and_retry(response.context)
118
118
  else
119
119
  response
@@ -133,6 +133,10 @@ module Aws
133
133
  S3::BUCKET_REGIONS[context.params[:bucket]] = actual_region
134
134
  end
135
135
 
136
+ def fips_region?(resp)
137
+ resp.context.http_request.endpoint.host.include?('fips')
138
+ end
139
+
136
140
  def wrong_sigv4_region?(resp)
137
141
  resp.context.http_response.status_code == 400 &&
138
142
  (
@@ -98,6 +98,7 @@ module Aws
98
98
  req.handlers.remove(Seahorse::Client::Plugins::ContentLength::Handler)
99
99
 
100
100
  signer = build_signer(req.context.config)
101
+ req.context[:presigned_url] = true
101
102
 
102
103
  req.handle(step: :send) do |context|
103
104