faster_s3 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/faster_s3.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["patrick@redbubble.com"]
11
11
  spec.description = %q{Download files from s3 in parallel}
12
12
  spec.summary = %q{Faster s3 downloads}
13
- spec.homepage = ""
13
+ spec.homepage = "https://github.com/stiak/faster_s3"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -0,0 +1,68 @@
1
+ require 'aws/s3'
2
+ require 'parallel'
3
+
4
+ module FasterS3
5
+ class Download
6
+ DEFAULT_PARTS = 8
7
+ CHUNK_SIZE = 1024*1024
8
+
9
+ attr_reader :target_path, :config, :parts
10
+
11
+ def initialize(target_path, config = {})
12
+ @target_path = target_path
13
+ @config = config
14
+ @parts = config[:parallel] || DEFAULT_PARTS
15
+ end
16
+
17
+ def download
18
+ reconstitute(download_parts)
19
+ end
20
+
21
+ def download_parts
22
+ part_objects = create_parts
23
+ Parallel.map(part_objects, in_threads: part_objects.length) do |file_part|
24
+ file_part.download(s3_object)
25
+ file_part.part_path
26
+ end
27
+ end
28
+
29
+ def create_parts
30
+ length = s3_object.content_length
31
+ raise "File is too small to download in parallel" if length < parts
32
+
33
+ # Split into parts
34
+ part_length = length / parts
35
+ remainder = length % parts
36
+
37
+ part_objects = []
38
+ parts.times do |index|
39
+ # Last part includes any remaining bytes
40
+ extra_bytes = (index == (parts - 1)) ? remainder : 0
41
+ part_objects << Part.new(target_path, index, part_length, extra_bytes)
42
+ end
43
+
44
+ part_objects
45
+ end
46
+
47
+ def reconstitute(file_parts)
48
+ File.open(target_path, 'wb') do |file|
49
+ file_parts.each do |part_path|
50
+ File.open(part_path, 'rb') do |part_file|
51
+ file.write(part_file.read(CHUNK_SIZE)) until part_file.eof?
52
+ end
53
+ File.delete(part_path)
54
+ end
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def s3_object
61
+ @s3_object ||= begin
62
+ s3 = AWS::S3.new({:access_key_id => config[:access_key_id], :secret_access_key => config[:secret_access_key]})
63
+ bucket = s3.buckets[config[:bucket_name]]
64
+ bucket.objects[config[:path]]
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,33 @@
1
+ class FasterS3::Part
2
+ attr_accessor :file_path, :index, :part_length, :extra_bytes
3
+
4
+ def initialize(file_path, index, part_length, extra_bytes)
5
+ self.file_path = file_path
6
+ self.index = index
7
+ self.part_length = part_length
8
+ self.extra_bytes = extra_bytes
9
+ end
10
+
11
+ def part_path
12
+ "#{file_path}.part.#{index}"
13
+ end
14
+
15
+ def byte_range
16
+ start = index * part_length
17
+ end_pos = start + part_length + extra_bytes
18
+
19
+ (start + existing_size)...end_pos
20
+ end
21
+
22
+ def existing_size
23
+ File.exists?(part_path) ? File.size(part_path) : 0
24
+ end
25
+
26
+ def download(s3_object)
27
+ File.open(part_path, 'ab') do |file|
28
+ s3_object.read(range: byte_range) do |chunk|
29
+ file.write(chunk)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -1,3 +1,3 @@
1
1
  module FasterS3
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
data/lib/faster_s3.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require "faster_s3/version"
2
- require 'aws/s3'
3
- require 'parallel'
2
+ require "faster_s3/part"
3
+ require "faster_s3/download"
4
4
 
5
5
  module FasterS3
6
6
  # @param [String] destination File will be written to this destination
@@ -17,69 +17,7 @@ module FasterS3
17
17
  # @option options [String] :parallel Optional - number of parallel parts to download. Default is 8
18
18
  #
19
19
  def self.download(destination, options)
20
- Download.new(options).download_to(destination)
21
- end
22
-
23
- class Download
24
- DEFAULT_PARTS = 8
25
- CHUNK_SIZE = 1024*1024
26
-
27
- attr_reader :config, :parts
28
-
29
- def initialize(config = {})
30
- @config = config
31
- @parts = config[:parallel] || DEFAULT_PARTS
32
- end
33
-
34
- def download_to(path)
35
- s3_object = remote_object
36
- length = s3_object.content_length
37
- raise "File is too small to download in parallel" if length < parts
38
-
39
- # Split into parts
40
- part_length = length / parts
41
- remainder = length % parts
42
-
43
- part_array = Array(1..parts)
44
- file_parts = Parallel.map(part_array, in_threads: parts) do |part|
45
- start = (part - 1) * part_length
46
- end_pos = start + part_length
47
-
48
- if part == part_array.last
49
- end_pos += remainder
50
- byte_range = start..end_pos
51
- else
52
- byte_range = start...end_pos
53
- end
54
-
55
- part_path = "#{path}.part.#{part}"
56
- File.open("#{path}.part.#{part}", 'wb') do |file|
57
- s3_object.read(range: byte_range) do |chunk|
58
- file.write(chunk)
59
- end
60
- end
61
- part_path
62
- end
63
-
64
- reconstitute_parts(path, file_parts)
65
- end
66
-
67
- def reconstitute_parts(path, file_parts)
68
- File.open(path, 'wb') do |file|
69
- file_parts.each do |part_path|
70
- File.open(part_path, 'rb') do |part_file|
71
- file.write(part_file.read(CHUNK_SIZE)) until part_file.eof?
72
- end
73
- File.delete(part_path)
74
- end
75
- end
76
- end
77
-
78
- def remote_object
79
- s3 = AWS::S3.new({:access_key_id => config[:access_key_id], :secret_access_key => config[:secret_access_key]})
80
- bucket = s3.buckets[config[:bucket_name]]
81
- bucket.objects[config[:path]]
82
- end
20
+ Download.new(destination, options).download
83
21
  end
84
22
 
85
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: faster_s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-11-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: &70297353186560 !ruby/object:Gem::Requirement
16
+ requirement: &70342546745840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70297353186560
24
+ version_requirements: *70342546745840
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake
27
- requirement: &70297353185680 !ruby/object:Gem::Requirement
27
+ requirement: &70342546745360 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70297353185680
35
+ version_requirements: *70342546745360
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: aws-sdk
38
- requirement: &70297353184260 !ruby/object:Gem::Requirement
38
+ requirement: &70342546744860 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70297353184260
46
+ version_requirements: *70342546744860
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: parallel
49
- requirement: &70297353183640 !ruby/object:Gem::Requirement
49
+ requirement: &70342546744400 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70297353183640
57
+ version_requirements: *70342546744400
58
58
  description: Download files from s3 in parallel
59
59
  email:
60
60
  - patrick@redbubble.com
@@ -71,8 +71,10 @@ files:
71
71
  - Rakefile
72
72
  - faster_s3.gemspec
73
73
  - lib/faster_s3.rb
74
+ - lib/faster_s3/download.rb
75
+ - lib/faster_s3/part.rb
74
76
  - lib/faster_s3/version.rb
75
- homepage: ''
77
+ homepage: https://github.com/stiak/faster_s3
76
78
  licenses:
77
79
  - MIT
78
80
  post_install_message: