aws-sdk-s3 1.0.0.rc3 → 1.0.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47082bcd6514ee19071b0e65b3614849bc841028
4
- data.tar.gz: d0a6a1b92982a0cfb6a1df90d2abf54d9870a947
3
+ metadata.gz: 5fbb74b560c218d4dda7e2f060f0b87dd468e095
4
+ data.tar.gz: 6e888b327c450752ccdde576d35a9567f854cd44
5
5
  SHA512:
6
- metadata.gz: 321f5f82ee5ec0c7537a8578e6f26c1a22c17722c3debd411ca3f7f34ed66a8792147960c8e0499285c2b48b290f6f12d487a1aa1d2ea5e56b57b5882caaa2b0
7
- data.tar.gz: c3fbf872aa1413afcfc62116b0aed6c6cfaa703ea0d2130dcb50896b905a84731a6a580223a79830d42d1a8452319aea8dc47ff010814ba81f332041cbdca599
6
+ metadata.gz: e2ee1313f3356b25e7ea6527b8c1dbf0b6e16db9e19181728852fa7738887af59a313b09c637a621923c29ce190291dea6290ea57af43e0bde868f95b7e64d8b
7
+ data.tar.gz: bf7c18e08d97a1f64ca0d432c0fa6ca55d18d198bd8740c722dbd7320e95695725ad73b6b8a7640878f1a572eedf640afcec625d9a956bbc8d689c15289c803d
@@ -61,6 +61,6 @@ require_relative 'aws-sdk-s3/customizations'
61
61
  # @service
62
62
  module Aws::S3
63
63
 
64
- GEM_VERSION = '1.0.0.rc2'
64
+ GEM_VERSION = '1.0.0.rc4'
65
65
 
66
66
  end
@@ -18,6 +18,7 @@ require 'aws-sdk-core/plugins/regional_endpoint.rb'
18
18
  require 'aws-sdk-core/plugins/response_paging.rb'
19
19
  require 'aws-sdk-core/plugins/stub_responses.rb'
20
20
  require 'aws-sdk-core/plugins/idempotency_token.rb'
21
+ require 'aws-sdk-core/plugins/jsonvalue_converter.rb'
21
22
  require 'aws-sdk-core/plugins/protocols/rest_xml.rb'
22
23
  require 'aws-sdk-s3/plugins/accelerate.rb'
23
24
  require 'aws-sdk-s3/plugins/dualstack.rb'
@@ -57,6 +58,7 @@ module Aws::S3
57
58
  add_plugin(Aws::Plugins::ResponsePaging)
58
59
  add_plugin(Aws::Plugins::StubResponses)
59
60
  add_plugin(Aws::Plugins::IdempotencyToken)
61
+ add_plugin(Aws::Plugins::JsonvalueConverter)
60
62
  add_plugin(Aws::Plugins::Protocols::RestXml)
61
63
  add_plugin(Aws::S3::Plugins::Accelerate)
62
64
  add_plugin(Aws::S3::Plugins::Dualstack)
@@ -4361,7 +4363,7 @@ module Aws::S3
4361
4363
  params: params,
4362
4364
  config: config)
4363
4365
  context[:gem_name] = 'aws-sdk-s3'
4364
- context[:gem_version] = '1.0.0.rc2'
4366
+ context[:gem_version] = '1.0.0.rc4'
4365
4367
  Seahorse::Client::Request.new(handlers, context)
4366
4368
  end
4367
4369
 
@@ -3,6 +3,7 @@ require 'aws-sdk-s3/bucket_region_cache'
3
3
  require 'aws-sdk-s3/encryption'
4
4
  require 'aws-sdk-s3/file_part'
5
5
  require 'aws-sdk-s3/file_uploader'
6
+ require 'aws-sdk-s3/file_downloader'
6
7
  require 'aws-sdk-s3/legacy_signer'
7
8
  require 'aws-sdk-s3/multipart_file_uploader'
8
9
  require 'aws-sdk-s3/multipart_upload_error'
@@ -253,6 +253,38 @@ module Aws
253
253
  true
254
254
  end
255
255
 
256
+ # Downloads a file in S3 to a path on disk.
257
+ #
258
+ # # small files (< 5MB) are downloaded in a single API call
259
+ # obj.download_file('/path/to/file')
260
+ #
261
+ # Files larger than 5MB are downloaded using multipart method
262
+ #
263
+ # # large files are split into parts
264
+ # # and the parts are downloaded in parallel
265
+ # obj.download_file('/path/to/very_large_file')
266
+ #
267
+ # @param [String] destination Where to download the file to
268
+ #
269
+ # @option options [String] mode `auto`, `single_request`, `get_range`
270
+ # `single_request` mode forces only 1 GET request is made in download,
271
+ # `get_range` mode allows `chunk_size` parameter to configured in
272
+ # customizing each range size in multipart_download,
273
+ # By default, `auto` mode is enabled, which performs multipart_download
274
+ #
275
+ # @option options [String] chunk_size required in get_range mode
276
+ #
277
+ # @option options [String] thread_count Customize threads used in multipart
278
+ # download, if not provided, 10 is default value
279
+ #
280
+ # @return [Boolean] Returns `true` when the file is downloaded
281
+ # without any errors.
282
+ def download_file(destination, options = {})
283
+ downloader = FileDownloader.new(client: client)
284
+ downloader.download(
285
+ destination, options.merge(bucket: bucket_name, key: key))
286
+ true
287
+ end
256
288
  end
257
289
  end
258
290
  end
@@ -60,6 +60,14 @@ module Aws
60
60
  object.upload_file(source, options)
61
61
  end
62
62
 
63
+ # @param (see Object#download_file)
64
+ # @options (see Object#download_file)
65
+ # @return (see Object#download_file)
66
+ # @see Object#download_file
67
+ def download_file(destination, options = {})
68
+ object.download_file(destination, options)
69
+ end
70
+
63
71
  end
64
72
  end
65
73
  end
@@ -0,0 +1,173 @@
1
+ require 'pathname'
2
+ require 'thread'
3
+ require 'set'
4
+ require 'tmpdir'
5
+
6
+ module Aws
7
+ module S3
8
+ # @api private
9
+ class FileDownloader
10
+
11
+ MIN_CHUNK_SIZE = 5 * 1024 * 1024
12
+ MAX_PARTS = 10_000
13
+ THREAD_COUNT = 10
14
+
15
+ def initialize(options = {})
16
+ @client = options[:client] || Client.new
17
+ end
18
+
19
+ # @return [Client]
20
+ attr_reader :client
21
+
22
+ def download(destination, options = {})
23
+ @path = destination
24
+ @mode = options[:mode] || "auto"
25
+ @thread_count = options[:thread_count] || THREAD_COUNT
26
+ @chunk_size = options[:chunk_size]
27
+ @bucket = options[:bucket]
28
+ @key = options[:key]
29
+
30
+ case @mode
31
+ when "auto" then multipart_download
32
+ when "single_request" then single_request
33
+ when "get_range"
34
+ if @chunk_size
35
+ resp = @client.head_object(bucket: @bucket, key: @key)
36
+ multithreaded_get_by_ranges(construct_chunks(resp.content_length))
37
+ else
38
+ msg = "In :get_range mode, :chunk_size must be provided"
39
+ raise ArgumentError, msg
40
+ end
41
+ else
42
+ msg = "Invalid mode #{@mode} provided, "\
43
+ "mode should be :single_request, :get_range or :auto"
44
+ raise ArgumentError, msg
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def multipart_download
51
+ resp = @client.head_object(bucket: @bucket, key: @key, part_number: 1)
52
+ count = resp.parts_count
53
+ if count.nil? || count <= 1
54
+ resp.content_length < MIN_CHUNK_SIZE ?
55
+ single_request :
56
+ multithreaded_get_by_ranges(construct_chunks(resp.content_length))
57
+ else
58
+ # partNumber is an option
59
+ resp = @client.head_object(bucket: @bucket, key: @key)
60
+ resp.content_length < MIN_CHUNK_SIZE ?
61
+ single_request :
62
+ compute_mode(resp.content_length, count)
63
+ end
64
+ end
65
+
66
+ def compute_mode(file_size, count)
67
+ chunk_size = compute_chunk(file_size)
68
+ part_size = (file_size.to_f / count.to_f).ceil
69
+ if chunk_size < part_size
70
+ multithreaded_get_by_ranges(construct_chunks(file_size))
71
+ else
72
+ multithreaded_get_by_parts(count)
73
+ end
74
+ end
75
+
76
+ def construct_chunks(file_size)
77
+ offset = 0
78
+ default_chunk_size = compute_chunk(file_size)
79
+ chunks = []
80
+ while offset <= file_size
81
+ progress = offset + default_chunk_size
82
+ chunks << "bytes=#{offset}-#{progress < file_size ? progress : file_size}"
83
+ offset = progress + 1
84
+ end
85
+ chunks
86
+ end
87
+
88
+ def compute_chunk(file_size)
89
+ if @chunk_size && @chunk_size > file_size
90
+ raise ArgumentError, ":chunk_size shouldn't exceed total file size."
91
+ else
92
+ default_chunk_size = @chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
93
+ end
94
+ end
95
+
96
+ def sort_files(files)
97
+ # sort file by start range count or part number
98
+ files.sort do |a, b|
99
+ a[/([^\=]+)$/].split('-')[0].to_i <=> b[/([^\=]+)$/].split('-')[0].to_i
100
+ end
101
+ end
102
+
103
+ def concatenate_parts(fileparts)
104
+ File.open(@path, 'wb')do |output_path|
105
+ sort_files(fileparts).each {|part| IO.copy_stream(part, output_path)}
106
+ end
107
+ end
108
+
109
+ def clean_up_parts(parts)
110
+ parts.each do |filename|
111
+ File.unlink(filename) if File.exists?(filename)
112
+ end
113
+ end
114
+
115
+ def file_batches(chunks, mode)
116
+ batches = []
117
+ dir = Dir.tmpdir
118
+ chunks = (1..chunks) if mode.eql? 'part_number'
119
+ chunks.each_slice(@thread_count) do |slice|
120
+ batches << map_files(slice, dir, mode)
121
+ end
122
+ batches
123
+ end
124
+
125
+ def map_files(slice, dir, mode)
126
+ case mode
127
+ when 'range'
128
+ slice.inject({}) {|h, chunk| h[chunk] = File.join(dir, chunk); h}
129
+ when 'part_number'
130
+ slice.inject({}) {|h, part| h[part] = File.join(dir, "part_number=#{part}"); h}
131
+ end
132
+ end
133
+
134
+ def multithreaded_get_by_ranges(chunks)
135
+ thread_batches(chunks, 'range')
136
+ end
137
+
138
+ def multithreaded_get_by_parts(parts)
139
+ thread_batches(parts, 'part_number')
140
+ end
141
+
142
+ def thread_batches(chunks, param)
143
+ batches = file_batches(chunks, param)
144
+ parts = batches.flat_map(&:values)
145
+ begin
146
+ batches.each do |batch|
147
+ threads = []
148
+ batch.each do |chunk, file|
149
+ threads << Thread.new do
150
+ resp = @client.get_object(
151
+ :bucket => @bucket,
152
+ :key => @key,
153
+ param.to_sym => chunk,
154
+ :response_target => file
155
+ )
156
+ end
157
+ end
158
+ threads.each(&:join)
159
+ end
160
+ concatenate_parts(parts)
161
+ ensure
162
+ clean_up_parts(parts)
163
+ end
164
+ end
165
+
166
+ def single_request
167
+ @client.get_object(
168
+ bucket: @bucket, key: @key, response_target: @path
169
+ )
170
+ end
171
+ end
172
+ end
173
+ end
@@ -1,6 +1,7 @@
1
1
  require 'set'
2
2
  require 'time'
3
3
  require 'openssl'
4
+ require 'cgi/util'
4
5
  require 'webrick/httputils'
5
6
  require 'aws-sdk-core/query'
6
7
 
@@ -1,4 +1,5 @@
1
1
  require 'thread'
2
+ require 'cgi/util'
2
3
 
3
4
  module Aws
4
5
  module S3
@@ -1,4 +1,5 @@
1
1
  require 'uri'
2
+ require 'cgi/util'
2
3
 
3
4
  module Aws
4
5
  module S3
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.rc3
4
+ version: 1.0.0.rc4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-09 00:00:00.000000000 Z
11
+ date: 2017-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-core
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - '='
32
32
  - !ruby/object:Gem::Version
33
- version: 1.0.0.rc3
33
+ version: 1.0.0.rc4
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
- version: 1.0.0.rc3
40
+ version: 1.0.0.rc4
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: aws-sigv4
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -96,6 +96,7 @@ files:
96
96
  - lib/aws-sdk-s3/encryption/materials.rb
97
97
  - lib/aws-sdk-s3/encryption/utils.rb
98
98
  - lib/aws-sdk-s3/errors.rb
99
+ - lib/aws-sdk-s3/file_downloader.rb
99
100
  - lib/aws-sdk-s3/file_part.rb
100
101
  - lib/aws-sdk-s3/file_uploader.rb
101
102
  - lib/aws-sdk-s3/legacy_signer.rb