faster_s3 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/faster_s3.gemspec +1 -1
- data/lib/faster_s3/download.rb +68 -0
- data/lib/faster_s3/part.rb +33 -0
- data/lib/faster_s3/version.rb +1 -1
- data/lib/faster_s3.rb +3 -65
- metadata +12 -10
data/faster_s3.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["patrick@redbubble.com"]
|
11
11
|
spec.description = %q{Download files from s3 in parallel}
|
12
12
|
spec.summary = %q{Faster s3 downloads}
|
13
|
-
spec.homepage = ""
|
13
|
+
spec.homepage = "https://github.com/stiak/faster_s3"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
16
16
|
spec.files = `git ls-files`.split($/)
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'aws/s3'
|
2
|
+
require 'parallel'
|
3
|
+
|
4
|
+
module FasterS3
|
5
|
+
class Download
|
6
|
+
DEFAULT_PARTS = 8
|
7
|
+
CHUNK_SIZE = 1024*1024
|
8
|
+
|
9
|
+
attr_reader :target_path, :config, :parts
|
10
|
+
|
11
|
+
def initialize(target_path, config = {})
|
12
|
+
@target_path = target_path
|
13
|
+
@config = config
|
14
|
+
@parts = config[:parallel] || DEFAULT_PARTS
|
15
|
+
end
|
16
|
+
|
17
|
+
def download
|
18
|
+
reconstitute(download_parts)
|
19
|
+
end
|
20
|
+
|
21
|
+
def download_parts
|
22
|
+
part_objects = create_parts
|
23
|
+
Parallel.map(part_objects, in_threads: part_objects.length) do |file_part|
|
24
|
+
file_part.download(s3_object)
|
25
|
+
file_part.part_path
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def create_parts
|
30
|
+
length = s3_object.content_length
|
31
|
+
raise "File is too small to download in parallel" if length < parts
|
32
|
+
|
33
|
+
# Split into parts
|
34
|
+
part_length = length / parts
|
35
|
+
remainder = length % parts
|
36
|
+
|
37
|
+
part_objects = []
|
38
|
+
parts.times do |index|
|
39
|
+
# Last part includes any remaining bytes
|
40
|
+
extra_bytes = (index == (parts - 1)) ? remainder : 0
|
41
|
+
part_objects << Part.new(target_path, index, part_length, extra_bytes)
|
42
|
+
end
|
43
|
+
|
44
|
+
part_objects
|
45
|
+
end
|
46
|
+
|
47
|
+
def reconstitute(file_parts)
|
48
|
+
File.open(target_path, 'wb') do |file|
|
49
|
+
file_parts.each do |part_path|
|
50
|
+
File.open(part_path, 'rb') do |part_file|
|
51
|
+
file.write(part_file.read(CHUNK_SIZE)) until part_file.eof?
|
52
|
+
end
|
53
|
+
File.delete(part_path)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def s3_object
|
61
|
+
@s3_object ||= begin
|
62
|
+
s3 = AWS::S3.new({:access_key_id => config[:access_key_id], :secret_access_key => config[:secret_access_key]})
|
63
|
+
bucket = s3.buckets[config[:bucket_name]]
|
64
|
+
bucket.objects[config[:path]]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
class FasterS3::Part
|
2
|
+
attr_accessor :file_path, :index, :part_length, :extra_bytes
|
3
|
+
|
4
|
+
def initialize(file_path, index, part_length, extra_bytes)
|
5
|
+
self.file_path = file_path
|
6
|
+
self.index = index
|
7
|
+
self.part_length = part_length
|
8
|
+
self.extra_bytes = extra_bytes
|
9
|
+
end
|
10
|
+
|
11
|
+
def part_path
|
12
|
+
"#{file_path}.part.#{index}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def byte_range
|
16
|
+
start = index * part_length
|
17
|
+
end_pos = start + part_length + extra_bytes
|
18
|
+
|
19
|
+
(start + existing_size)...end_pos
|
20
|
+
end
|
21
|
+
|
22
|
+
def existing_size
|
23
|
+
File.exists?(part_path) ? File.size(part_path) : 0
|
24
|
+
end
|
25
|
+
|
26
|
+
def download(s3_object)
|
27
|
+
File.open(part_path, 'ab') do |file|
|
28
|
+
s3_object.read(range: byte_range) do |chunk|
|
29
|
+
file.write(chunk)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/faster_s3/version.rb
CHANGED
data/lib/faster_s3.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require "faster_s3/version"
|
2
|
-
require
|
3
|
-
require
|
2
|
+
require "faster_s3/part"
|
3
|
+
require "faster_s3/download"
|
4
4
|
|
5
5
|
module FasterS3
|
6
6
|
# @param [String] destination File will be written to this destination
|
@@ -17,69 +17,7 @@ module FasterS3
|
|
17
17
|
# @option options [String] :parallel Optional - number of parallel parts to download. Default is 8
|
18
18
|
#
|
19
19
|
def self.download(destination, options)
|
20
|
-
Download.new(options).
|
21
|
-
end
|
22
|
-
|
23
|
-
class Download
|
24
|
-
DEFAULT_PARTS = 8
|
25
|
-
CHUNK_SIZE = 1024*1024
|
26
|
-
|
27
|
-
attr_reader :config, :parts
|
28
|
-
|
29
|
-
def initialize(config = {})
|
30
|
-
@config = config
|
31
|
-
@parts = config[:parallel] || DEFAULT_PARTS
|
32
|
-
end
|
33
|
-
|
34
|
-
def download_to(path)
|
35
|
-
s3_object = remote_object
|
36
|
-
length = s3_object.content_length
|
37
|
-
raise "File is too small to download in parallel" if length < parts
|
38
|
-
|
39
|
-
# Split into parts
|
40
|
-
part_length = length / parts
|
41
|
-
remainder = length % parts
|
42
|
-
|
43
|
-
part_array = Array(1..parts)
|
44
|
-
file_parts = Parallel.map(part_array, in_threads: parts) do |part|
|
45
|
-
start = (part - 1) * part_length
|
46
|
-
end_pos = start + part_length
|
47
|
-
|
48
|
-
if part == part_array.last
|
49
|
-
end_pos += remainder
|
50
|
-
byte_range = start..end_pos
|
51
|
-
else
|
52
|
-
byte_range = start...end_pos
|
53
|
-
end
|
54
|
-
|
55
|
-
part_path = "#{path}.part.#{part}"
|
56
|
-
File.open("#{path}.part.#{part}", 'wb') do |file|
|
57
|
-
s3_object.read(range: byte_range) do |chunk|
|
58
|
-
file.write(chunk)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
part_path
|
62
|
-
end
|
63
|
-
|
64
|
-
reconstitute_parts(path, file_parts)
|
65
|
-
end
|
66
|
-
|
67
|
-
def reconstitute_parts(path, file_parts)
|
68
|
-
File.open(path, 'wb') do |file|
|
69
|
-
file_parts.each do |part_path|
|
70
|
-
File.open(part_path, 'rb') do |part_file|
|
71
|
-
file.write(part_file.read(CHUNK_SIZE)) until part_file.eof?
|
72
|
-
end
|
73
|
-
File.delete(part_path)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def remote_object
|
79
|
-
s3 = AWS::S3.new({:access_key_id => config[:access_key_id], :secret_access_key => config[:secret_access_key]})
|
80
|
-
bucket = s3.buckets[config[:bucket_name]]
|
81
|
-
bucket.objects[config[:path]]
|
82
|
-
end
|
20
|
+
Download.new(destination, options).download
|
83
21
|
end
|
84
22
|
|
85
23
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: faster_s3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2013-11-07 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
|
-
requirement: &
|
16
|
+
requirement: &70342546745840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '1.3'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70342546745840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake
|
27
|
-
requirement: &
|
27
|
+
requirement: &70342546745360 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70342546745360
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: aws-sdk
|
38
|
-
requirement: &
|
38
|
+
requirement: &70342546744860 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70342546744860
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: parallel
|
49
|
-
requirement: &
|
49
|
+
requirement: &70342546744400 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70342546744400
|
58
58
|
description: Download files from s3 in parallel
|
59
59
|
email:
|
60
60
|
- patrick@redbubble.com
|
@@ -71,8 +71,10 @@ files:
|
|
71
71
|
- Rakefile
|
72
72
|
- faster_s3.gemspec
|
73
73
|
- lib/faster_s3.rb
|
74
|
+
- lib/faster_s3/download.rb
|
75
|
+
- lib/faster_s3/part.rb
|
74
76
|
- lib/faster_s3/version.rb
|
75
|
-
homepage:
|
77
|
+
homepage: https://github.com/stiak/faster_s3
|
76
78
|
licenses:
|
77
79
|
- MIT
|
78
80
|
post_install_message:
|