faster_s3 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/faster_s3.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["patrick@redbubble.com"]
11
11
  spec.description = %q{Download files from s3 in parallel}
12
12
  spec.summary = %q{Faster s3 downloads}
13
- spec.homepage = ""
13
+ spec.homepage = "https://github.com/stiak/faster_s3"
14
14
  spec.license = "MIT"
15
15
 
16
16
  spec.files = `git ls-files`.split($/)
@@ -0,0 +1,68 @@
1
+ require 'aws/s3'
2
+ require 'parallel'
3
+
4
+ module FasterS3
5
+ class Download
6
+ DEFAULT_PARTS = 8
7
+ CHUNK_SIZE = 1024*1024
8
+
9
+ attr_reader :target_path, :config, :parts
10
+
11
+ def initialize(target_path, config = {})
12
+ @target_path = target_path
13
+ @config = config
14
+ @parts = config[:parallel] || DEFAULT_PARTS
15
+ end
16
+
17
+ def download
18
+ reconstitute(download_parts)
19
+ end
20
+
21
+ def download_parts
22
+ part_objects = create_parts
23
+ Parallel.map(part_objects, in_threads: part_objects.length) do |file_part|
24
+ file_part.download(s3_object)
25
+ file_part.part_path
26
+ end
27
+ end
28
+
29
+ def create_parts
30
+ length = s3_object.content_length
31
+ raise "File is too small to download in parallel" if length < parts
32
+
33
+ # Split into parts
34
+ part_length = length / parts
35
+ remainder = length % parts
36
+
37
+ part_objects = []
38
+ parts.times do |index|
39
+ # Last part includes any remaining bytes
40
+ extra_bytes = (index == (parts - 1)) ? remainder : 0
41
+ part_objects << Part.new(target_path, index, part_length, extra_bytes)
42
+ end
43
+
44
+ part_objects
45
+ end
46
+
47
+ def reconstitute(file_parts)
48
+ File.open(target_path, 'wb') do |file|
49
+ file_parts.each do |part_path|
50
+ File.open(part_path, 'rb') do |part_file|
51
+ file.write(part_file.read(CHUNK_SIZE)) until part_file.eof?
52
+ end
53
+ File.delete(part_path)
54
+ end
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def s3_object
61
+ @s3_object ||= begin
62
+ s3 = AWS::S3.new({:access_key_id => config[:access_key_id], :secret_access_key => config[:secret_access_key]})
63
+ bucket = s3.buckets[config[:bucket_name]]
64
+ bucket.objects[config[:path]]
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,33 @@
1
+ class FasterS3::Part
2
+ attr_accessor :file_path, :index, :part_length, :extra_bytes
3
+
4
+ def initialize(file_path, index, part_length, extra_bytes)
5
+ self.file_path = file_path
6
+ self.index = index
7
+ self.part_length = part_length
8
+ self.extra_bytes = extra_bytes
9
+ end
10
+
11
+ def part_path
12
+ "#{file_path}.part.#{index}"
13
+ end
14
+
15
+ def byte_range
16
+ start = index * part_length
17
+ end_pos = start + part_length + extra_bytes
18
+
19
+ (start + existing_size)...end_pos
20
+ end
21
+
22
+ def existing_size
23
+ File.exists?(part_path) ? File.size(part_path) : 0
24
+ end
25
+
26
+ def download(s3_object)
27
+ File.open(part_path, 'ab') do |file|
28
+ s3_object.read(range: byte_range) do |chunk|
29
+ file.write(chunk)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -1,3 +1,3 @@
1
1
  module FasterS3
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
data/lib/faster_s3.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require "faster_s3/version"
2
- require 'aws/s3'
3
- require 'parallel'
2
+ require "faster_s3/part"
3
+ require "faster_s3/download"
4
4
 
5
5
  module FasterS3
6
6
  # @param [String] destination File will be written to this destination
@@ -17,69 +17,7 @@ module FasterS3
17
17
  # @option options [String] :parallel Optional - number of parallel parts to download. Default is 8
18
18
  #
19
19
  def self.download(destination, options)
20
- Download.new(options).download_to(destination)
21
- end
22
-
23
- class Download
24
- DEFAULT_PARTS = 8
25
- CHUNK_SIZE = 1024*1024
26
-
27
- attr_reader :config, :parts
28
-
29
- def initialize(config = {})
30
- @config = config
31
- @parts = config[:parallel] || DEFAULT_PARTS
32
- end
33
-
34
- def download_to(path)
35
- s3_object = remote_object
36
- length = s3_object.content_length
37
- raise "File is too small to download in parallel" if length < parts
38
-
39
- # Split into parts
40
- part_length = length / parts
41
- remainder = length % parts
42
-
43
- part_array = Array(1..parts)
44
- file_parts = Parallel.map(part_array, in_threads: parts) do |part|
45
- start = (part - 1) * part_length
46
- end_pos = start + part_length
47
-
48
- if part == part_array.last
49
- end_pos += remainder
50
- byte_range = start..end_pos
51
- else
52
- byte_range = start...end_pos
53
- end
54
-
55
- part_path = "#{path}.part.#{part}"
56
- File.open("#{path}.part.#{part}", 'wb') do |file|
57
- s3_object.read(range: byte_range) do |chunk|
58
- file.write(chunk)
59
- end
60
- end
61
- part_path
62
- end
63
-
64
- reconstitute_parts(path, file_parts)
65
- end
66
-
67
- def reconstitute_parts(path, file_parts)
68
- File.open(path, 'wb') do |file|
69
- file_parts.each do |part_path|
70
- File.open(part_path, 'rb') do |part_file|
71
- file.write(part_file.read(CHUNK_SIZE)) until part_file.eof?
72
- end
73
- File.delete(part_path)
74
- end
75
- end
76
- end
77
-
78
- def remote_object
79
- s3 = AWS::S3.new({:access_key_id => config[:access_key_id], :secret_access_key => config[:secret_access_key]})
80
- bucket = s3.buckets[config[:bucket_name]]
81
- bucket.objects[config[:path]]
82
- end
20
+ Download.new(destination, options).download
83
21
  end
84
22
 
85
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: faster_s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2013-11-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
- requirement: &70297353186560 !ruby/object:Gem::Requirement
16
+ requirement: &70342546745840 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '1.3'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70297353186560
24
+ version_requirements: *70342546745840
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rake
27
- requirement: &70297353185680 !ruby/object:Gem::Requirement
27
+ requirement: &70342546745360 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70297353185680
35
+ version_requirements: *70342546745360
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: aws-sdk
38
- requirement: &70297353184260 !ruby/object:Gem::Requirement
38
+ requirement: &70342546744860 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70297353184260
46
+ version_requirements: *70342546744860
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: parallel
49
- requirement: &70297353183640 !ruby/object:Gem::Requirement
49
+ requirement: &70342546744400 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70297353183640
57
+ version_requirements: *70342546744400
58
58
  description: Download files from s3 in parallel
59
59
  email:
60
60
  - patrick@redbubble.com
@@ -71,8 +71,10 @@ files:
71
71
  - Rakefile
72
72
  - faster_s3.gemspec
73
73
  - lib/faster_s3.rb
74
+ - lib/faster_s3/download.rb
75
+ - lib/faster_s3/part.rb
74
76
  - lib/faster_s3/version.rb
75
- homepage: ''
77
+ homepage: https://github.com/stiak/faster_s3
76
78
  licenses:
77
79
  - MIT
78
80
  post_install_message: