aws-sdk-s3 1.0.0.rc3 → 1.0.0.rc4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47082bcd6514ee19071b0e65b3614849bc841028
4
- data.tar.gz: d0a6a1b92982a0cfb6a1df90d2abf54d9870a947
3
+ metadata.gz: 5fbb74b560c218d4dda7e2f060f0b87dd468e095
4
+ data.tar.gz: 6e888b327c450752ccdde576d35a9567f854cd44
5
5
  SHA512:
6
- metadata.gz: 321f5f82ee5ec0c7537a8578e6f26c1a22c17722c3debd411ca3f7f34ed66a8792147960c8e0499285c2b48b290f6f12d487a1aa1d2ea5e56b57b5882caaa2b0
7
- data.tar.gz: c3fbf872aa1413afcfc62116b0aed6c6cfaa703ea0d2130dcb50896b905a84731a6a580223a79830d42d1a8452319aea8dc47ff010814ba81f332041cbdca599
6
+ metadata.gz: e2ee1313f3356b25e7ea6527b8c1dbf0b6e16db9e19181728852fa7738887af59a313b09c637a621923c29ce190291dea6290ea57af43e0bde868f95b7e64d8b
7
+ data.tar.gz: bf7c18e08d97a1f64ca0d432c0fa6ca55d18d198bd8740c722dbd7320e95695725ad73b6b8a7640878f1a572eedf640afcec625d9a956bbc8d689c15289c803d
@@ -61,6 +61,6 @@ require_relative 'aws-sdk-s3/customizations'
61
61
  # @service
62
62
  module Aws::S3
63
63
 
64
- GEM_VERSION = '1.0.0.rc2'
64
+ GEM_VERSION = '1.0.0.rc4'
65
65
 
66
66
  end
@@ -18,6 +18,7 @@ require 'aws-sdk-core/plugins/regional_endpoint.rb'
18
18
  require 'aws-sdk-core/plugins/response_paging.rb'
19
19
  require 'aws-sdk-core/plugins/stub_responses.rb'
20
20
  require 'aws-sdk-core/plugins/idempotency_token.rb'
21
+ require 'aws-sdk-core/plugins/jsonvalue_converter.rb'
21
22
  require 'aws-sdk-core/plugins/protocols/rest_xml.rb'
22
23
  require 'aws-sdk-s3/plugins/accelerate.rb'
23
24
  require 'aws-sdk-s3/plugins/dualstack.rb'
@@ -57,6 +58,7 @@ module Aws::S3
57
58
  add_plugin(Aws::Plugins::ResponsePaging)
58
59
  add_plugin(Aws::Plugins::StubResponses)
59
60
  add_plugin(Aws::Plugins::IdempotencyToken)
61
+ add_plugin(Aws::Plugins::JsonvalueConverter)
60
62
  add_plugin(Aws::Plugins::Protocols::RestXml)
61
63
  add_plugin(Aws::S3::Plugins::Accelerate)
62
64
  add_plugin(Aws::S3::Plugins::Dualstack)
@@ -4361,7 +4363,7 @@ module Aws::S3
4361
4363
  params: params,
4362
4364
  config: config)
4363
4365
  context[:gem_name] = 'aws-sdk-s3'
4364
- context[:gem_version] = '1.0.0.rc2'
4366
+ context[:gem_version] = '1.0.0.rc4'
4365
4367
  Seahorse::Client::Request.new(handlers, context)
4366
4368
  end
4367
4369
 
@@ -3,6 +3,7 @@ require 'aws-sdk-s3/bucket_region_cache'
3
3
  require 'aws-sdk-s3/encryption'
4
4
  require 'aws-sdk-s3/file_part'
5
5
  require 'aws-sdk-s3/file_uploader'
6
+ require 'aws-sdk-s3/file_downloader'
6
7
  require 'aws-sdk-s3/legacy_signer'
7
8
  require 'aws-sdk-s3/multipart_file_uploader'
8
9
  require 'aws-sdk-s3/multipart_upload_error'
@@ -253,6 +253,38 @@ module Aws
253
253
  true
254
254
  end
255
255
 
256
+ # Downloads a file in S3 to a path on disk.
257
+ #
258
+ # # small files (< 5MB) are downloaded in a single API call
259
+ # obj.download_file('/path/to/file')
260
+ #
261
+ # Files larger than 5MB are downloaded using multipart method
262
+ #
263
+ # # large files are split into parts
264
+ # # and the parts are downloaded in parallel
265
+ # obj.download_file('/path/to/very_large_file')
266
+ #
267
+ # @param [String] destination Where to download the file to
268
+ #
269
+ # @option options [String] mode `auto`, `single_request`, `get_range`
270
+ # `single_request` mode forces only 1 GET request is made in download,
271
+ # `get_range` mode allows `chunk_size` parameter to configured in
272
+ # customizing each range size in multipart_download,
273
+ # By default, `auto` mode is enabled, which performs multipart_download
274
+ #
275
+ # @option options [String] chunk_size required in get_range mode
276
+ #
277
+ # @option options [String] thread_count Customize threads used in multipart
278
+ # download, if not provided, 10 is default value
279
+ #
280
+ # @return [Boolean] Returns `true` when the file is downloaded
281
+ # without any errors.
282
+ def download_file(destination, options = {})
283
+ downloader = FileDownloader.new(client: client)
284
+ downloader.download(
285
+ destination, options.merge(bucket: bucket_name, key: key))
286
+ true
287
+ end
256
288
  end
257
289
  end
258
290
  end
@@ -60,6 +60,14 @@ module Aws
60
60
  object.upload_file(source, options)
61
61
  end
62
62
 
63
+ # @param (see Object#download_file)
64
+ # @options (see Object#download_file)
65
+ # @return (see Object#download_file)
66
+ # @see Object#download_file
67
+ def download_file(destination, options = {})
68
+ object.download_file(destination, options)
69
+ end
70
+
63
71
  end
64
72
  end
65
73
  end
@@ -0,0 +1,173 @@
1
+ require 'pathname'
2
+ require 'thread'
3
+ require 'set'
4
+ require 'tmpdir'
5
+
6
+ module Aws
7
+ module S3
8
+ # @api private
9
+ class FileDownloader
10
+
11
+ MIN_CHUNK_SIZE = 5 * 1024 * 1024
12
+ MAX_PARTS = 10_000
13
+ THREAD_COUNT = 10
14
+
15
+ def initialize(options = {})
16
+ @client = options[:client] || Client.new
17
+ end
18
+
19
+ # @return [Client]
20
+ attr_reader :client
21
+
22
+ def download(destination, options = {})
23
+ @path = destination
24
+ @mode = options[:mode] || "auto"
25
+ @thread_count = options[:thread_count] || THREAD_COUNT
26
+ @chunk_size = options[:chunk_size]
27
+ @bucket = options[:bucket]
28
+ @key = options[:key]
29
+
30
+ case @mode
31
+ when "auto" then multipart_download
32
+ when "single_request" then single_request
33
+ when "get_range"
34
+ if @chunk_size
35
+ resp = @client.head_object(bucket: @bucket, key: @key)
36
+ multithreaded_get_by_ranges(construct_chunks(resp.content_length))
37
+ else
38
+ msg = "In :get_range mode, :chunk_size must be provided"
39
+ raise ArgumentError, msg
40
+ end
41
+ else
42
+ msg = "Invalid mode #{@mode} provided, "\
43
+ "mode should be :single_request, :get_range or :auto"
44
+ raise ArgumentError, msg
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def multipart_download
51
+ resp = @client.head_object(bucket: @bucket, key: @key, part_number: 1)
52
+ count = resp.parts_count
53
+ if count.nil? || count <= 1
54
+ resp.content_length < MIN_CHUNK_SIZE ?
55
+ single_request :
56
+ multithreaded_get_by_ranges(construct_chunks(resp.content_length))
57
+ else
58
+ # partNumber is an option
59
+ resp = @client.head_object(bucket: @bucket, key: @key)
60
+ resp.content_length < MIN_CHUNK_SIZE ?
61
+ single_request :
62
+ compute_mode(resp.content_length, count)
63
+ end
64
+ end
65
+
66
+ def compute_mode(file_size, count)
67
+ chunk_size = compute_chunk(file_size)
68
+ part_size = (file_size.to_f / count.to_f).ceil
69
+ if chunk_size < part_size
70
+ multithreaded_get_by_ranges(construct_chunks(file_size))
71
+ else
72
+ multithreaded_get_by_parts(count)
73
+ end
74
+ end
75
+
76
+ def construct_chunks(file_size)
77
+ offset = 0
78
+ default_chunk_size = compute_chunk(file_size)
79
+ chunks = []
80
+ while offset <= file_size
81
+ progress = offset + default_chunk_size
82
+ chunks << "bytes=#{offset}-#{progress < file_size ? progress : file_size}"
83
+ offset = progress + 1
84
+ end
85
+ chunks
86
+ end
87
+
88
+ def compute_chunk(file_size)
89
+ if @chunk_size && @chunk_size > file_size
90
+ raise ArgumentError, ":chunk_size shouldn't exceed total file size."
91
+ else
92
+ default_chunk_size = @chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
93
+ end
94
+ end
95
+
96
+ def sort_files(files)
97
+ # sort file by start range count or part number
98
+ files.sort do |a, b|
99
+ a[/([^\=]+)$/].split('-')[0].to_i <=> b[/([^\=]+)$/].split('-')[0].to_i
100
+ end
101
+ end
102
+
103
+ def concatenate_parts(fileparts)
104
+ File.open(@path, 'wb')do |output_path|
105
+ sort_files(fileparts).each {|part| IO.copy_stream(part, output_path)}
106
+ end
107
+ end
108
+
109
+ def clean_up_parts(parts)
110
+ parts.each do |filename|
111
+ File.unlink(filename) if File.exists?(filename)
112
+ end
113
+ end
114
+
115
+ def file_batches(chunks, mode)
116
+ batches = []
117
+ dir = Dir.tmpdir
118
+ chunks = (1..chunks) if mode.eql? 'part_number'
119
+ chunks.each_slice(@thread_count) do |slice|
120
+ batches << map_files(slice, dir, mode)
121
+ end
122
+ batches
123
+ end
124
+
125
+ def map_files(slice, dir, mode)
126
+ case mode
127
+ when 'range'
128
+ slice.inject({}) {|h, chunk| h[chunk] = File.join(dir, chunk); h}
129
+ when 'part_number'
130
+ slice.inject({}) {|h, part| h[part] = File.join(dir, "part_number=#{part}"); h}
131
+ end
132
+ end
133
+
134
+ def multithreaded_get_by_ranges(chunks)
135
+ thread_batches(chunks, 'range')
136
+ end
137
+
138
+ def multithreaded_get_by_parts(parts)
139
+ thread_batches(parts, 'part_number')
140
+ end
141
+
142
+ def thread_batches(chunks, param)
143
+ batches = file_batches(chunks, param)
144
+ parts = batches.flat_map(&:values)
145
+ begin
146
+ batches.each do |batch|
147
+ threads = []
148
+ batch.each do |chunk, file|
149
+ threads << Thread.new do
150
+ resp = @client.get_object(
151
+ :bucket => @bucket,
152
+ :key => @key,
153
+ param.to_sym => chunk,
154
+ :response_target => file
155
+ )
156
+ end
157
+ end
158
+ threads.each(&:join)
159
+ end
160
+ concatenate_parts(parts)
161
+ ensure
162
+ clean_up_parts(parts)
163
+ end
164
+ end
165
+
166
+ def single_request
167
+ @client.get_object(
168
+ bucket: @bucket, key: @key, response_target: @path
169
+ )
170
+ end
171
+ end
172
+ end
173
+ end
@@ -1,6 +1,7 @@
1
1
  require 'set'
2
2
  require 'time'
3
3
  require 'openssl'
4
+ require 'cgi/util'
4
5
  require 'webrick/httputils'
5
6
  require 'aws-sdk-core/query'
6
7
 
@@ -1,4 +1,5 @@
1
1
  require 'thread'
2
+ require 'cgi/util'
2
3
 
3
4
  module Aws
4
5
  module S3
@@ -1,4 +1,5 @@
1
1
  require 'uri'
2
+ require 'cgi/util'
2
3
 
3
4
  module Aws
4
5
  module S3
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.rc3
4
+ version: 1.0.0.rc4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-03-09 00:00:00.000000000 Z
11
+ date: 2017-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-core
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - '='
32
32
  - !ruby/object:Gem::Version
33
- version: 1.0.0.rc3
33
+ version: 1.0.0.rc4
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
- version: 1.0.0.rc3
40
+ version: 1.0.0.rc4
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: aws-sigv4
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -96,6 +96,7 @@ files:
96
96
  - lib/aws-sdk-s3/encryption/materials.rb
97
97
  - lib/aws-sdk-s3/encryption/utils.rb
98
98
  - lib/aws-sdk-s3/errors.rb
99
+ - lib/aws-sdk-s3/file_downloader.rb
99
100
  - lib/aws-sdk-s3/file_part.rb
100
101
  - lib/aws-sdk-s3/file_uploader.rb
101
102
  - lib/aws-sdk-s3/legacy_signer.rb