aws-sdk-resources 2.9.11 → 2.9.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 01a582ab6d06d39226ba147e559072d47bbab6a9
4
- data.tar.gz: a1c9de6cfe900a7eebc03729d38a3e4a768c6640
3
+ metadata.gz: 786d4d8e13a23fa96bb7023bb1ce60c3496cedd8
4
+ data.tar.gz: 0dd86067e191c9dc953655a66f94e792ed87e768
5
5
  SHA512:
6
- metadata.gz: 0e314de235b991ed501cbfd2d12f6450e74438047b618d2bcb65c6c48a54a908aecaa1e091a295e17060758abf5a02dbb5b2c236414a87444e1c26712d62b19d
7
- data.tar.gz: e0a60ca1c63c28ecf9fe56312a8078425668f77535dce21d42d2afda71a50edca8b884f9f8fa3c4b0c698d78f970248399b992ff26421917e7ea9c04a11a513d
6
+ metadata.gz: 8268d55e91878e34dff6e29bbf0daabdc0a2a275d087e27e609393ed3da91ac78a40693fcf0145fa18a373c81e88bb0107cc3a4bdc0213e28c0b884c87347fe2
7
+ data.tar.gz: 5b30960ed192163f10f0ea3c582520cebd7e912bc50cb16cf2b72ead87a8e1fff91c9b042aa68a01c3c79429fd17f205687c069d1a60b124c6bf455c6d0ba2e7
@@ -9,6 +9,7 @@ module Aws
9
9
  autoload :Encryption, 'aws-sdk-resources/services/s3/encryption'
10
10
  autoload :FilePart, 'aws-sdk-resources/services/s3/file_part'
11
11
  autoload :FileUploader, 'aws-sdk-resources/services/s3/file_uploader'
12
+ autoload :FileDownloader, 'aws-sdk-resources/services/s3/file_downloader'
12
13
  autoload :MultipartFileUploader, 'aws-sdk-resources/services/s3/multipart_file_uploader'
13
14
  autoload :MultipartUploadError, 'aws-sdk-resources/services/s3/multipart_upload_error'
14
15
  autoload :ObjectCopier, 'aws-sdk-resources/services/s3/object_copier'
@@ -0,0 +1,173 @@
1
+ require 'pathname'
2
+ require 'thread'
3
+ require 'set'
4
+ require 'tmpdir'
5
+
6
+ module Aws
7
+ module S3
8
+ # @api private
9
+ class FileDownloader
10
+
11
+ MIN_CHUNK_SIZE = 5 * 1024 * 1024
12
+ MAX_PARTS = 10_000
13
+ THREAD_COUNT = 10
14
+
15
+ def initialize(options = {})
16
+ @client = options[:client] || Client.new
17
+ end
18
+
19
+ # @return [Client]
20
+ attr_reader :client
21
+
22
+ def download(destination, options = {})
23
+ @path = destination
24
+ @mode = options[:mode] || "auto"
25
+ @thread_count = options[:thread_count] || THREAD_COUNT
26
+ @chunk_size = options[:chunk_size]
27
+ @bucket = options[:bucket]
28
+ @key = options[:key]
29
+
30
+ case @mode
31
+ when "auto" then multipart_download
32
+ when "single_request" then single_request
33
+ when "get_range"
34
+ if @chunk_size
35
+ resp = @client.head_object(bucket: @bucket, key: @key)
36
+ multithreaded_get_by_ranges(construct_chunks(resp.content_length))
37
+ else
38
+ msg = "In :get_range mode, :chunk_size must be provided"
39
+ raise ArgumentError, msg
40
+ end
41
+ else
42
+ msg = "Invalid mode #{@mode} provided, "\
43
+ "mode should be :single_request, :get_range or :auto"
44
+ raise ArgumentError, msg
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def multipart_download
51
+ resp = @client.head_object(bucket: @bucket, key: @key, part_number: 1)
52
+ count = resp.parts_count
53
+ if count.nil? || count <= 1
54
+ resp.content_length < MIN_CHUNK_SIZE ?
55
+ single_request :
56
+ multithreaded_get_by_ranges(construct_chunks(resp.content_length))
57
+ else
58
+ # partNumber is an option
59
+ resp = @client.head_object(bucket: @bucket, key: @key)
60
+ resp.content_length < MIN_CHUNK_SIZE ?
61
+ single_request :
62
+ compute_mode(resp.content_length, count)
63
+ end
64
+ end
65
+
66
+ def compute_mode(file_size, count)
67
+ chunk_size = compute_chunk(file_size)
68
+ part_size = (file_size.to_f / count.to_f).ceil
69
+ if chunk_size < part_size
70
+ multithreaded_get_by_ranges(construct_chunks(file_size))
71
+ else
72
+ multithreaded_get_by_parts(count)
73
+ end
74
+ end
75
+
76
+ def construct_chunks(file_size)
77
+ offset = 0
78
+ default_chunk_size = compute_chunk(file_size)
79
+ chunks = []
80
+ while offset <= file_size
81
+ progress = offset + default_chunk_size
82
+ chunks << "bytes=#{offset}-#{progress < file_size ? progress : file_size}"
83
+ offset = progress + 1
84
+ end
85
+ chunks
86
+ end
87
+
88
+ def compute_chunk(file_size)
89
+ if @chunk_size && @chunk_size > file_size
90
+ raise ArgumentError, ":chunk_size shouldn't exceed total file size."
91
+ else
92
+ default_chunk_size = @chunk_size || [(file_size.to_f / MAX_PARTS).ceil, MIN_CHUNK_SIZE].max.to_i
93
+ end
94
+ end
95
+
96
+ def sort_files(files)
97
+ # sort file by start range count or part number
98
+ files.sort do |a, b|
99
+ a[/([^\=]+)$/].split('-')[0].to_i <=> b[/([^\=]+)$/].split('-')[0].to_i
100
+ end
101
+ end
102
+
103
+ def concatenate_parts(fileparts)
104
+ File.open(@path, 'wb')do |output_path|
105
+ sort_files(fileparts).each {|part| IO.copy_stream(part, output_path)}
106
+ end
107
+ end
108
+
109
+ def clean_up_parts(parts)
110
+ parts.each do |filename|
111
+ File.unlink(filename) if File.exists?(filename)
112
+ end
113
+ end
114
+
115
+ def file_batches(chunks, mode)
116
+ batches = []
117
+ dir = Dir.tmpdir
118
+ chunks = (1..chunks) if mode.eql? 'part_number'
119
+ chunks.each_slice(@thread_count) do |slice|
120
+ batches << map_files(slice, dir, mode)
121
+ end
122
+ batches
123
+ end
124
+
125
+ def map_files(slice, dir, mode)
126
+ case mode
127
+ when 'range'
128
+ slice.inject({}) {|h, chunk| h[chunk] = File.join(dir, chunk); h}
129
+ when 'part_number'
130
+ slice.inject({}) {|h, part| h[part] = File.join(dir, "part_number=#{part}"); h}
131
+ end
132
+ end
133
+
134
+ def multithreaded_get_by_ranges(chunks)
135
+ thread_batches(chunks, 'range')
136
+ end
137
+
138
+ def multithreaded_get_by_parts(parts)
139
+ thread_batches(parts, 'part_number')
140
+ end
141
+
142
+ def thread_batches(chunks, param)
143
+ batches = file_batches(chunks, param)
144
+ parts = batches.flat_map(&:values)
145
+ begin
146
+ batches.each do |batch|
147
+ threads = []
148
+ batch.each do |chunk, file|
149
+ threads << Thread.new do
150
+ resp = @client.get_object(
151
+ :bucket => @bucket,
152
+ :key => @key,
153
+ param.to_sym => chunk,
154
+ :response_target => file
155
+ )
156
+ end
157
+ end
158
+ threads.each(&:join)
159
+ end
160
+ concatenate_parts(parts)
161
+ ensure
162
+ clean_up_parts(parts)
163
+ end
164
+ end
165
+
166
+ def single_request
167
+ @client.get_object(
168
+ bucket: @bucket, key: @key, response_target: @path
169
+ )
170
+ end
171
+ end
172
+ end
173
+ end
@@ -252,7 +252,39 @@ module Aws
252
252
  uploader.upload(source, uploading_options.merge(bucket: bucket_name, key: key))
253
253
  true
254
254
  end
255
-
255
+
256
+ # Downloads a file in S3 to a path on disk.
257
+ #
258
+ # # small files (< 5MB) are downloaded in a single API call
259
+ # obj.download_file('/path/to/file')
260
+ #
261
+ # Files larger than 5MB are downloaded using multipart method
262
+ #
263
+ # # large files are split into parts
264
+ # # and the parts are downloaded in parallel
265
+ # obj.download_file('/path/to/very_large_file')
266
+ #
267
+ # @param [String] destination Where to download the file to
268
+ #
269
+ # @option options [String] mode `auto`, `single_request`, `get_range`
270
+ # `single_request` mode forces only 1 GET request is made in download,
271
+ # `get_range` mode allows `chunk_size` parameter to configured in
272
+ # customizing each range size in multipart_download,
273
+ # By default, `auto` mode is enabled, which performs multipart_download
274
+ #
275
+ # @option options [String] chunk_size required in get_range mode
276
+ #
277
+ # @option options [String] thread_count Customize threads used in multipart
278
+ # download, if not provided, 10 is default value
279
+ #
280
+ # @return [Boolean] Returns `true` when the file is downloaded
281
+ # without any errors.
282
+ def download_file(destination, options = {})
283
+ downloader = FileDownloader.new(client: client)
284
+ downloader.download(
285
+ destination, options.merge(bucket: bucket_name, key: key))
286
+ true
287
+ end
256
288
  end
257
289
  end
258
290
  end
@@ -60,6 +60,14 @@ module Aws
60
60
  object.upload_file(source, options)
61
61
  end
62
62
 
63
+ # @param (see Object#download_file)
64
+ # @options (see Object#download_file)
65
+ # @return (see Object#download_file)
66
+ # @see Object#download_file
67
+ def download_file(destination, options = {})
68
+ object.download_file(destination, options)
69
+ end
70
+
63
71
  end
64
72
  end
65
73
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aws-sdk-resources
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.9.11
4
+ version: 2.9.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Amazon Web Services
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-21 00:00:00.000000000 Z
11
+ date: 2017-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-core
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '='
18
18
  - !ruby/object:Gem::Version
19
- version: 2.9.11
19
+ version: 2.9.12
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
- version: 2.9.11
26
+ version: 2.9.12
27
27
  description: Provides resource oriented interfaces and other higher-level abstractions
28
28
  for many AWS services. This gem is part of the official AWS SDK for Ruby.
29
29
  email:
@@ -72,6 +72,7 @@ files:
72
72
  - lib/aws-sdk-resources/services/s3/encryption/kms_cipher_provider.rb
73
73
  - lib/aws-sdk-resources/services/s3/encryption/materials.rb
74
74
  - lib/aws-sdk-resources/services/s3/encryption/utils.rb
75
+ - lib/aws-sdk-resources/services/s3/file_downloader.rb
75
76
  - lib/aws-sdk-resources/services/s3/file_part.rb
76
77
  - lib/aws-sdk-resources/services/s3/file_uploader.rb
77
78
  - lib/aws-sdk-resources/services/s3/multipart_file_uploader.rb