aws-sdk-s3 1.0.0.rc3 → 1.0.0.rc4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/aws-sdk-s3.rb +1 -1
- data/lib/aws-sdk-s3/client.rb +3 -1
- data/lib/aws-sdk-s3/customizations.rb +1 -0
- data/lib/aws-sdk-s3/customizations/object.rb +32 -0
- data/lib/aws-sdk-s3/customizations/object_summary.rb +8 -0
- data/lib/aws-sdk-s3/file_downloader.rb +173 -0
- data/lib/aws-sdk-s3/legacy_signer.rb +1 -0
- data/lib/aws-sdk-s3/object_multipart_copier.rb +1 -0
- data/lib/aws-sdk-s3/plugins/url_encoded_keys.rb +1 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fbb74b560c218d4dda7e2f060f0b87dd468e095
|
4
|
+
data.tar.gz: 6e888b327c450752ccdde576d35a9567f854cd44
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2ee1313f3356b25e7ea6527b8c1dbf0b6e16db9e19181728852fa7738887af59a313b09c637a621923c29ce190291dea6290ea57af43e0bde868f95b7e64d8b
|
7
|
+
data.tar.gz: bf7c18e08d97a1f64ca0d432c0fa6ca55d18d198bd8740c722dbd7320e95695725ad73b6b8a7640878f1a572eedf640afcec625d9a956bbc8d689c15289c803d
|
data/lib/aws-sdk-s3.rb
CHANGED
data/lib/aws-sdk-s3/client.rb
CHANGED
@@ -18,6 +18,7 @@ require 'aws-sdk-core/plugins/regional_endpoint.rb'
|
|
18
18
|
require 'aws-sdk-core/plugins/response_paging.rb'
|
19
19
|
require 'aws-sdk-core/plugins/stub_responses.rb'
|
20
20
|
require 'aws-sdk-core/plugins/idempotency_token.rb'
|
21
|
+
require 'aws-sdk-core/plugins/jsonvalue_converter.rb'
|
21
22
|
require 'aws-sdk-core/plugins/protocols/rest_xml.rb'
|
22
23
|
require 'aws-sdk-s3/plugins/accelerate.rb'
|
23
24
|
require 'aws-sdk-s3/plugins/dualstack.rb'
|
@@ -57,6 +58,7 @@ module Aws::S3
|
|
57
58
|
add_plugin(Aws::Plugins::ResponsePaging)
|
58
59
|
add_plugin(Aws::Plugins::StubResponses)
|
59
60
|
add_plugin(Aws::Plugins::IdempotencyToken)
|
61
|
+
add_plugin(Aws::Plugins::JsonvalueConverter)
|
60
62
|
add_plugin(Aws::Plugins::Protocols::RestXml)
|
61
63
|
add_plugin(Aws::S3::Plugins::Accelerate)
|
62
64
|
add_plugin(Aws::S3::Plugins::Dualstack)
|
@@ -4361,7 +4363,7 @@ module Aws::S3
|
|
4361
4363
|
params: params,
|
4362
4364
|
config: config)
|
4363
4365
|
context[:gem_name] = 'aws-sdk-s3'
|
4364
|
-
context[:gem_version] = '1.0.0.
|
4366
|
+
context[:gem_version] = '1.0.0.rc4'
|
4365
4367
|
Seahorse::Client::Request.new(handlers, context)
|
4366
4368
|
end
|
4367
4369
|
|
@@ -3,6 +3,7 @@ require 'aws-sdk-s3/bucket_region_cache'
|
|
3
3
|
require 'aws-sdk-s3/encryption'
|
4
4
|
require 'aws-sdk-s3/file_part'
|
5
5
|
require 'aws-sdk-s3/file_uploader'
|
6
|
+
require 'aws-sdk-s3/file_downloader'
|
6
7
|
require 'aws-sdk-s3/legacy_signer'
|
7
8
|
require 'aws-sdk-s3/multipart_file_uploader'
|
8
9
|
require 'aws-sdk-s3/multipart_upload_error'
|
@@ -253,6 +253,38 @@ module Aws
|
|
253
253
|
true
|
254
254
|
end
|
255
255
|
|
256
|
+
# Downloads a file in S3 to a path on disk.
|
257
|
+
#
|
258
|
+
# # small files (< 5MB) are downloaded in a single API call
|
259
|
+
# obj.download_file('/path/to/file')
|
260
|
+
#
|
261
|
+
# Files larger than 5MB are downloaded using multipart method
|
262
|
+
#
|
263
|
+
# # large files are split into parts
|
264
|
+
# # and the parts are downloaded in parallel
|
265
|
+
# obj.download_file('/path/to/very_large_file')
|
266
|
+
#
|
267
|
+
# @param [String] destination Where to download the file to
|
268
|
+
#
|
269
|
+
# @option options [String] mode `auto`, `single_request`, `get_range`
|
270
|
+
# `single_request` mode forces only 1 GET request is made in download,
|
271
|
+
# `get_range` mode allows `chunk_size` parameter to configured in
|
272
|
+
# customizing each range size in multipart_download,
|
273
|
+
# By default, `auto` mode is enabled, which performs multipart_download
|
274
|
+
#
|
275
|
+
# @option options [String] chunk_size required in get_range mode
|
276
|
+
#
|
277
|
+
# @option options [String] thread_count Customize threads used in multipart
|
278
|
+
# download, if not provided, 10 is default value
|
279
|
+
#
|
280
|
+
# @return [Boolean] Returns `true` when the file is downloaded
|
281
|
+
# without any errors.
|
282
|
+
def download_file(destination, options = {})
|
283
|
+
downloader = FileDownloader.new(client: client)
|
284
|
+
downloader.download(
|
285
|
+
destination, options.merge(bucket: bucket_name, key: key))
|
286
|
+
true
|
287
|
+
end
|
256
288
|
end
|
257
289
|
end
|
258
290
|
end
|
@@ -60,6 +60,14 @@ module Aws
|
|
60
60
|
object.upload_file(source, options)
|
61
61
|
end
|
62
62
|
|
63
|
+
# @param (see Object#download_file)
|
64
|
+
# @options (see Object#download_file)
|
65
|
+
# @return (see Object#download_file)
|
66
|
+
# @see Object#download_file
|
67
|
+
def download_file(destination, options = {})
|
68
|
+
object.download_file(destination, options)
|
69
|
+
end
|
70
|
+
|
63
71
|
end
|
64
72
|
end
|
65
73
|
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'thread'
|
3
|
+
require 'set'
|
4
|
+
require 'tmpdir'
|
5
|
+
|
6
|
+
module Aws
|
7
|
+
module S3
|
8
|
+
# @api private
|
9
|
+
class FileDownloader
|
10
|
+
|
11
|
+
MIN_CHUNK_SIZE = 5 * 1024 * 1024
|
12
|
+
MAX_PARTS = 10_000
|
13
|
+
THREAD_COUNT = 10
|
14
|
+
|
15
|
+
def initialize(options = {})
|
16
|
+
@client = options[:client] || Client.new
|
17
|
+
end
|
18
|
+
|
19
|
+
# @return [Client]
|
20
|
+
attr_reader :client
|
21
|
+
|
22
|
+
def download(destination, options = {})
|
23
|
+
@path = destination
|
24
|
+
@mode = options[:mode] || "auto"
|
25
|
+
@thread_count = options[:thread_count] || THREAD_COUNT
|
26
|
+
@chunk_size = options[:chunk_size]
|
27
|
+
@bucket = options[:bucket]
|
28
|
+
@key = options[:key]
|
29
|
+
|
30
|
+
case @mode
|
31
|
+
when "auto" then multipart_download
|
32
|
+
when "single_request" then single_request
|
33
|
+
when "get_range"
|
34
|
+
if @chunk_size
|
35
|
+
resp = @client.head_object(bucket: @bucket, key: @key)
|
36
|
+
multithreaded_get_by_ranges(construct_chunks(resp.content_length))
|
37
|
+
else
|
38
|
+
msg = "In :get_range mode, :chunk_size must be provided"
|
39
|
+
raise ArgumentError, msg
|
40
|
+
end
|
41
|
+
else
|
42
|
+
msg = "Invalid mode #{@mode} provided, "\
|
43
|
+
"mode should be :single_request, :get_range or :auto"
|
44
|
+
raise ArgumentError, msg
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def multipart_download
|
51
|
+
resp = @client.head_object(bucket: @bucket, key: @key, part_number: 1)
|
52
|
+
count = resp.parts_count
|
53
|
+
if count.nil? || count <= 1
|
54
|
+
resp.content_length < MIN_CHUNK_SIZE ?
|
55
|
+
single_request :
|
56
|
+
multithreaded_get_by_ranges(construct_chunks(resp.content_length))
|
57
|
+
else
|
58
|
+
# partNumber is an option
|
59
|
+
resp = @client.head_object(bucket: @bucket, key: @key)
|
60
|
+
resp.content_length < MIN_CHUNK_SIZE ?
|
61
|
+
single_request :
|
62
|
+
compute_mode(resp.content_length, count)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def compute_mode(file_size, count)
|
67
|
+
chunk_size = compute_chunk(file_size)
|
68
|
+
part_size = (file_size.to_f / count.to_f).ceil
|
69
|
+
if chunk_size < part_size
|
70
|
+
multithreaded_get_by_ranges(construct_chunks(file_size))
|
71
|
+
else
|
72
|
+
multithreaded_get_by_parts(count)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def construct_chunks(file_size)
|
77
|
+
offset = 0
|
78
|
+
default_chunk_size = compute_chunk(file_size)
|
79
|
+
chunks = []
|
80
|
+
while offset <= file_size
|
81
|
+
progress = offset + default_chunk_size
|
82
|
+
chunks << "bytes=#{offset}-#{progress < file_size ? progress : file_size}"
|
83
|
+
offset = progress + 1
|
84
|
+
end
|
85
|
+
chunks
|
86
|
+
end
|
87
|
+
|
88
|
+
def compute_chunk(file_size)
|
89
|
+
if @chunk_size && @chunk_size > file_size
|
90
|
+
raise ArgumentError, ":chunk_size shouldn't exceed total file size."
|
91
|
+
else
|
92
|
+
default_chunk_size = @chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def sort_files(files)
|
97
|
+
# sort file by start range count or part number
|
98
|
+
files.sort do |a, b|
|
99
|
+
a[/([^\=]+)$/].split('-')[0].to_i <=> b[/([^\=]+)$/].split('-')[0].to_i
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def concatenate_parts(fileparts)
|
104
|
+
File.open(@path, 'wb')do |output_path|
|
105
|
+
sort_files(fileparts).each {|part| IO.copy_stream(part, output_path)}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def clean_up_parts(parts)
|
110
|
+
parts.each do |filename|
|
111
|
+
File.unlink(filename) if File.exists?(filename)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def file_batches(chunks, mode)
|
116
|
+
batches = []
|
117
|
+
dir = Dir.tmpdir
|
118
|
+
chunks = (1..chunks) if mode.eql? 'part_number'
|
119
|
+
chunks.each_slice(@thread_count) do |slice|
|
120
|
+
batches << map_files(slice, dir, mode)
|
121
|
+
end
|
122
|
+
batches
|
123
|
+
end
|
124
|
+
|
125
|
+
def map_files(slice, dir, mode)
|
126
|
+
case mode
|
127
|
+
when 'range'
|
128
|
+
slice.inject({}) {|h, chunk| h[chunk] = File.join(dir, chunk); h}
|
129
|
+
when 'part_number'
|
130
|
+
slice.inject({}) {|h, part| h[part] = File.join(dir, "part_number=#{part}"); h}
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def multithreaded_get_by_ranges(chunks)
|
135
|
+
thread_batches(chunks, 'range')
|
136
|
+
end
|
137
|
+
|
138
|
+
def multithreaded_get_by_parts(parts)
|
139
|
+
thread_batches(parts, 'part_number')
|
140
|
+
end
|
141
|
+
|
142
|
+
def thread_batches(chunks, param)
|
143
|
+
batches = file_batches(chunks, param)
|
144
|
+
parts = batches.flat_map(&:values)
|
145
|
+
begin
|
146
|
+
batches.each do |batch|
|
147
|
+
threads = []
|
148
|
+
batch.each do |chunk, file|
|
149
|
+
threads << Thread.new do
|
150
|
+
resp = @client.get_object(
|
151
|
+
:bucket => @bucket,
|
152
|
+
:key => @key,
|
153
|
+
param.to_sym => chunk,
|
154
|
+
:response_target => file
|
155
|
+
)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
threads.each(&:join)
|
159
|
+
end
|
160
|
+
concatenate_parts(parts)
|
161
|
+
ensure
|
162
|
+
clean_up_parts(parts)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def single_request
|
167
|
+
@client.get_object(
|
168
|
+
bucket: @bucket, key: @key, response_target: @path
|
169
|
+
)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aws-sdk-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.
|
4
|
+
version: 1.0.0.rc4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Amazon Web Services
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-core
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.0.0.
|
33
|
+
version: 1.0.0.rc4
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.0.0.
|
40
|
+
version: 1.0.0.rc4
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: aws-sigv4
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,6 +96,7 @@ files:
|
|
96
96
|
- lib/aws-sdk-s3/encryption/materials.rb
|
97
97
|
- lib/aws-sdk-s3/encryption/utils.rb
|
98
98
|
- lib/aws-sdk-s3/errors.rb
|
99
|
+
- lib/aws-sdk-s3/file_downloader.rb
|
99
100
|
- lib/aws-sdk-s3/file_part.rb
|
100
101
|
- lib/aws-sdk-s3/file_uploader.rb
|
101
102
|
- lib/aws-sdk-s3/legacy_signer.rb
|