aws-sdk-s3 1.0.0.rc3 → 1.0.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/aws-sdk-s3.rb +1 -1
- data/lib/aws-sdk-s3/client.rb +3 -1
- data/lib/aws-sdk-s3/customizations.rb +1 -0
- data/lib/aws-sdk-s3/customizations/object.rb +32 -0
- data/lib/aws-sdk-s3/customizations/object_summary.rb +8 -0
- data/lib/aws-sdk-s3/file_downloader.rb +173 -0
- data/lib/aws-sdk-s3/legacy_signer.rb +1 -0
- data/lib/aws-sdk-s3/object_multipart_copier.rb +1 -0
- data/lib/aws-sdk-s3/plugins/url_encoded_keys.rb +1 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5fbb74b560c218d4dda7e2f060f0b87dd468e095
|
4
|
+
data.tar.gz: 6e888b327c450752ccdde576d35a9567f854cd44
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2ee1313f3356b25e7ea6527b8c1dbf0b6e16db9e19181728852fa7738887af59a313b09c637a621923c29ce190291dea6290ea57af43e0bde868f95b7e64d8b
|
7
|
+
data.tar.gz: bf7c18e08d97a1f64ca0d432c0fa6ca55d18d198bd8740c722dbd7320e95695725ad73b6b8a7640878f1a572eedf640afcec625d9a956bbc8d689c15289c803d
|
data/lib/aws-sdk-s3.rb
CHANGED
data/lib/aws-sdk-s3/client.rb
CHANGED
@@ -18,6 +18,7 @@ require 'aws-sdk-core/plugins/regional_endpoint.rb'
|
|
18
18
|
require 'aws-sdk-core/plugins/response_paging.rb'
|
19
19
|
require 'aws-sdk-core/plugins/stub_responses.rb'
|
20
20
|
require 'aws-sdk-core/plugins/idempotency_token.rb'
|
21
|
+
require 'aws-sdk-core/plugins/jsonvalue_converter.rb'
|
21
22
|
require 'aws-sdk-core/plugins/protocols/rest_xml.rb'
|
22
23
|
require 'aws-sdk-s3/plugins/accelerate.rb'
|
23
24
|
require 'aws-sdk-s3/plugins/dualstack.rb'
|
@@ -57,6 +58,7 @@ module Aws::S3
|
|
57
58
|
add_plugin(Aws::Plugins::ResponsePaging)
|
58
59
|
add_plugin(Aws::Plugins::StubResponses)
|
59
60
|
add_plugin(Aws::Plugins::IdempotencyToken)
|
61
|
+
add_plugin(Aws::Plugins::JsonvalueConverter)
|
60
62
|
add_plugin(Aws::Plugins::Protocols::RestXml)
|
61
63
|
add_plugin(Aws::S3::Plugins::Accelerate)
|
62
64
|
add_plugin(Aws::S3::Plugins::Dualstack)
|
@@ -4361,7 +4363,7 @@ module Aws::S3
|
|
4361
4363
|
params: params,
|
4362
4364
|
config: config)
|
4363
4365
|
context[:gem_name] = 'aws-sdk-s3'
|
4364
|
-
context[:gem_version] = '1.0.0.
|
4366
|
+
context[:gem_version] = '1.0.0.rc4'
|
4365
4367
|
Seahorse::Client::Request.new(handlers, context)
|
4366
4368
|
end
|
4367
4369
|
|
@@ -3,6 +3,7 @@ require 'aws-sdk-s3/bucket_region_cache'
|
|
3
3
|
require 'aws-sdk-s3/encryption'
|
4
4
|
require 'aws-sdk-s3/file_part'
|
5
5
|
require 'aws-sdk-s3/file_uploader'
|
6
|
+
require 'aws-sdk-s3/file_downloader'
|
6
7
|
require 'aws-sdk-s3/legacy_signer'
|
7
8
|
require 'aws-sdk-s3/multipart_file_uploader'
|
8
9
|
require 'aws-sdk-s3/multipart_upload_error'
|
@@ -253,6 +253,38 @@ module Aws
|
|
253
253
|
true
|
254
254
|
end
|
255
255
|
|
256
|
+
# Downloads a file in S3 to a path on disk.
|
257
|
+
#
|
258
|
+
# # small files (< 5MB) are downloaded in a single API call
|
259
|
+
# obj.download_file('/path/to/file')
|
260
|
+
#
|
261
|
+
# Files larger than 5MB are downloaded using multipart method
|
262
|
+
#
|
263
|
+
# # large files are split into parts
|
264
|
+
# # and the parts are downloaded in parallel
|
265
|
+
# obj.download_file('/path/to/very_large_file')
|
266
|
+
#
|
267
|
+
# @param [String] destination Where to download the file to
|
268
|
+
#
|
269
|
+
# @option options [String] mode `auto`, `single_request`, `get_range`
|
270
|
+
# `single_request` mode forces only 1 GET request is made in download,
|
271
|
+
# `get_range` mode allows `chunk_size` parameter to configured in
|
272
|
+
# customizing each range size in multipart_download,
|
273
|
+
# By default, `auto` mode is enabled, which performs multipart_download
|
274
|
+
#
|
275
|
+
# @option options [String] chunk_size required in get_range mode
|
276
|
+
#
|
277
|
+
# @option options [String] thread_count Customize threads used in multipart
|
278
|
+
# download, if not provided, 10 is default value
|
279
|
+
#
|
280
|
+
# @return [Boolean] Returns `true` when the file is downloaded
|
281
|
+
# without any errors.
|
282
|
+
def download_file(destination, options = {})
|
283
|
+
downloader = FileDownloader.new(client: client)
|
284
|
+
downloader.download(
|
285
|
+
destination, options.merge(bucket: bucket_name, key: key))
|
286
|
+
true
|
287
|
+
end
|
256
288
|
end
|
257
289
|
end
|
258
290
|
end
|
@@ -60,6 +60,14 @@ module Aws
|
|
60
60
|
object.upload_file(source, options)
|
61
61
|
end
|
62
62
|
|
63
|
+
# @param (see Object#download_file)
|
64
|
+
# @options (see Object#download_file)
|
65
|
+
# @return (see Object#download_file)
|
66
|
+
# @see Object#download_file
|
67
|
+
def download_file(destination, options = {})
|
68
|
+
object.download_file(destination, options)
|
69
|
+
end
|
70
|
+
|
63
71
|
end
|
64
72
|
end
|
65
73
|
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'thread'
|
3
|
+
require 'set'
|
4
|
+
require 'tmpdir'
|
5
|
+
|
6
|
+
module Aws
|
7
|
+
module S3
|
8
|
+
# @api private
|
9
|
+
class FileDownloader
|
10
|
+
|
11
|
+
MIN_CHUNK_SIZE = 5 * 1024 * 1024
|
12
|
+
MAX_PARTS = 10_000
|
13
|
+
THREAD_COUNT = 10
|
14
|
+
|
15
|
+
def initialize(options = {})
|
16
|
+
@client = options[:client] || Client.new
|
17
|
+
end
|
18
|
+
|
19
|
+
# @return [Client]
|
20
|
+
attr_reader :client
|
21
|
+
|
22
|
+
def download(destination, options = {})
|
23
|
+
@path = destination
|
24
|
+
@mode = options[:mode] || "auto"
|
25
|
+
@thread_count = options[:thread_count] || THREAD_COUNT
|
26
|
+
@chunk_size = options[:chunk_size]
|
27
|
+
@bucket = options[:bucket]
|
28
|
+
@key = options[:key]
|
29
|
+
|
30
|
+
case @mode
|
31
|
+
when "auto" then multipart_download
|
32
|
+
when "single_request" then single_request
|
33
|
+
when "get_range"
|
34
|
+
if @chunk_size
|
35
|
+
resp = @client.head_object(bucket: @bucket, key: @key)
|
36
|
+
multithreaded_get_by_ranges(construct_chunks(resp.content_length))
|
37
|
+
else
|
38
|
+
msg = "In :get_range mode, :chunk_size must be provided"
|
39
|
+
raise ArgumentError, msg
|
40
|
+
end
|
41
|
+
else
|
42
|
+
msg = "Invalid mode #{@mode} provided, "\
|
43
|
+
"mode should be :single_request, :get_range or :auto"
|
44
|
+
raise ArgumentError, msg
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def multipart_download
|
51
|
+
resp = @client.head_object(bucket: @bucket, key: @key, part_number: 1)
|
52
|
+
count = resp.parts_count
|
53
|
+
if count.nil? || count <= 1
|
54
|
+
resp.content_length < MIN_CHUNK_SIZE ?
|
55
|
+
single_request :
|
56
|
+
multithreaded_get_by_ranges(construct_chunks(resp.content_length))
|
57
|
+
else
|
58
|
+
# partNumber is an option
|
59
|
+
resp = @client.head_object(bucket: @bucket, key: @key)
|
60
|
+
resp.content_length < MIN_CHUNK_SIZE ?
|
61
|
+
single_request :
|
62
|
+
compute_mode(resp.content_length, count)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def compute_mode(file_size, count)
|
67
|
+
chunk_size = compute_chunk(file_size)
|
68
|
+
part_size = (file_size.to_f / count.to_f).ceil
|
69
|
+
if chunk_size < part_size
|
70
|
+
multithreaded_get_by_ranges(construct_chunks(file_size))
|
71
|
+
else
|
72
|
+
multithreaded_get_by_parts(count)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def construct_chunks(file_size)
|
77
|
+
offset = 0
|
78
|
+
default_chunk_size = compute_chunk(file_size)
|
79
|
+
chunks = []
|
80
|
+
while offset <= file_size
|
81
|
+
progress = offset + default_chunk_size
|
82
|
+
chunks << "bytes=#{offset}-#{progress < file_size ? progress : file_size}"
|
83
|
+
offset = progress + 1
|
84
|
+
end
|
85
|
+
chunks
|
86
|
+
end
|
87
|
+
|
88
|
+
def compute_chunk(file_size)
|
89
|
+
if @chunk_size && @chunk_size > file_size
|
90
|
+
raise ArgumentError, ":chunk_size shouldn't exceed total file size."
|
91
|
+
else
|
92
|
+
default_chunk_size = @chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def sort_files(files)
|
97
|
+
# sort file by start range count or part number
|
98
|
+
files.sort do |a, b|
|
99
|
+
a[/([^\=]+)$/].split('-')[0].to_i <=> b[/([^\=]+)$/].split('-')[0].to_i
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def concatenate_parts(fileparts)
|
104
|
+
File.open(@path, 'wb')do |output_path|
|
105
|
+
sort_files(fileparts).each {|part| IO.copy_stream(part, output_path)}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def clean_up_parts(parts)
|
110
|
+
parts.each do |filename|
|
111
|
+
File.unlink(filename) if File.exists?(filename)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def file_batches(chunks, mode)
|
116
|
+
batches = []
|
117
|
+
dir = Dir.tmpdir
|
118
|
+
chunks = (1..chunks) if mode.eql? 'part_number'
|
119
|
+
chunks.each_slice(@thread_count) do |slice|
|
120
|
+
batches << map_files(slice, dir, mode)
|
121
|
+
end
|
122
|
+
batches
|
123
|
+
end
|
124
|
+
|
125
|
+
def map_files(slice, dir, mode)
|
126
|
+
case mode
|
127
|
+
when 'range'
|
128
|
+
slice.inject({}) {|h, chunk| h[chunk] = File.join(dir, chunk); h}
|
129
|
+
when 'part_number'
|
130
|
+
slice.inject({}) {|h, part| h[part] = File.join(dir, "part_number=#{part}"); h}
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def multithreaded_get_by_ranges(chunks)
|
135
|
+
thread_batches(chunks, 'range')
|
136
|
+
end
|
137
|
+
|
138
|
+
def multithreaded_get_by_parts(parts)
|
139
|
+
thread_batches(parts, 'part_number')
|
140
|
+
end
|
141
|
+
|
142
|
+
def thread_batches(chunks, param)
|
143
|
+
batches = file_batches(chunks, param)
|
144
|
+
parts = batches.flat_map(&:values)
|
145
|
+
begin
|
146
|
+
batches.each do |batch|
|
147
|
+
threads = []
|
148
|
+
batch.each do |chunk, file|
|
149
|
+
threads << Thread.new do
|
150
|
+
resp = @client.get_object(
|
151
|
+
:bucket => @bucket,
|
152
|
+
:key => @key,
|
153
|
+
param.to_sym => chunk,
|
154
|
+
:response_target => file
|
155
|
+
)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
threads.each(&:join)
|
159
|
+
end
|
160
|
+
concatenate_parts(parts)
|
161
|
+
ensure
|
162
|
+
clean_up_parts(parts)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
def single_request
|
167
|
+
@client.get_object(
|
168
|
+
bucket: @bucket, key: @key, response_target: @path
|
169
|
+
)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aws-sdk-s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.0.
|
4
|
+
version: 1.0.0.rc4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Amazon Web Services
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: aws-sdk-core
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.0.0.
|
33
|
+
version: 1.0.0.rc4
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.0.0.
|
40
|
+
version: 1.0.0.rc4
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: aws-sigv4
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,6 +96,7 @@ files:
|
|
96
96
|
- lib/aws-sdk-s3/encryption/materials.rb
|
97
97
|
- lib/aws-sdk-s3/encryption/utils.rb
|
98
98
|
- lib/aws-sdk-s3/errors.rb
|
99
|
+
- lib/aws-sdk-s3/file_downloader.rb
|
99
100
|
- lib/aws-sdk-s3/file_part.rb
|
100
101
|
- lib/aws-sdk-s3/file_uploader.rb
|
101
102
|
- lib/aws-sdk-s3/legacy_signer.rb
|