s3_uploader 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- MDZiOTVmMWI5NTExMmQ2NzEyMTA1MTliYTVhNGJhMDA1MWJmYjEwZA==
5
- data.tar.gz: !binary |-
6
- ZDcyZTAwNjAwOGFlNzJjYzk0YjU3MGJmYTBiNTM2MmViZDVkYTkwNQ==
2
+ SHA1:
3
+ metadata.gz: 1a8fb8f3417ec607cb24ed10345b5592ddeecde7
4
+ data.tar.gz: 7f749a8deb7fe99d35c4365f264b2bb57206807c
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- YjYxNGVjODAwNjY2Y2E4YzU5NmY4NWEzNDFlNjg2NmI5NmJjYzhkYmNhMTg0
10
- NWNjOGM0MzQ2Yzk4NWRiYWY5N2NkYjUyY2M2ZGUzMGZkYTYyMzEyYmFlOGMw
11
- ZTI1NmQ1ZjFmYTFkN2MyNDViMWM4ZTFiMzFkNzEwYzAxNTBhZGY=
12
- data.tar.gz: !binary |-
13
- MGQ4NjEyMDA3N2M4OWY0NmIwMTI1MjFhY2UzY2NlY2VlZjNhM2NkNDNlZTQz
14
- NmVlNTk0MzNjYjBiOWUzYjBlYTA3ZDBmMzVhZDJiMjlkN2NjYzE5Y2ZjZWFl
15
- ZjgxYjA1OGVhZmU0MjFjODc3ZTE2ZDAyMmIxYjU4MzY1ZTIzMTg=
6
+ metadata.gz: aaaaf8f47a0548a371c867c0de00f639ec0fad740844892d4ba7d0aac4afb3679cb7fcc43737a155c72882e7212d242dfa8595d1cdd03737a705efe1d1c63b9b
7
+ data.tar.gz: 13a457b7691ad2a21ec201a5a64f80a36950edace655852122dc21d55e8c6128c00ec3eb0dabca051c013649b0bc1b4a4fa1d4306470894d19519a4b8cffb2f9
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ # Skip all tests tagged with "slow"
2
+ --tag ~slow
data/README.md CHANGED
@@ -29,18 +29,24 @@ Or install it yourself as:
29
29
 
30
30
  ## Usage
31
31
 
32
+ ```ruby
32
33
  S3Uploader.upload_directory('/tmp/test', 'mybucket',
33
- { :s3_key => YOUR_KEY,
34
- :s3_secret => YOUR_SECRET_KEY,
35
- :destination_dir => 'test/',
36
- :region => 'eu-west-1',
37
- :threads => 4,
38
- :metadata => { 'Cache-Control' => 'max-age=315576000' }
34
+ { :s3_key => YOUR_KEY,
35
+ :s3_secret => YOUR_SECRET_KEY,
36
+ :destination_dir => 'test/',
37
+ :region => 'eu-west-1',
38
+ :threads => 4,
39
+ :metadata => { 'Cache-Control' => 'max-age=315576000' }
39
40
  })
41
+ ```
40
42
 
41
43
  If no keys are provided, it uses S3_KEY and S3_SECRET environment variables. us-east-1 is the default region.
42
44
 
45
+ ```ruby
43
46
  S3Uploader.upload_directory('/tmp/test', 'mybucket', { :destination_dir => 'test/', :threads => 4 })
47
+ ```
48
+
49
+ Metadata headers are documented [here](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html)
44
50
 
45
51
  Or as a command line binary
46
52
 
@@ -49,14 +55,26 @@ Or as a command line binary
49
55
  Again, it uses S3_KEY and S3_SECRET environment variables if non provided in parameters.
50
56
 
51
57
  s3uploader -d test/ -t 4 /tmp/test mybucket
52
-
53
-
54
- Metadata headers are documented [here](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html)
58
+
59
+ ## Compress files
60
+
61
+ If the `:gzip` options is used, files not already compressed are packed using GZip before upload. A GZip working
62
+ directory is required in this case.
63
+
64
+ ```ruby
65
+ S3Uploader.upload_directory('/tmp/test', 'mybucket',
66
+ { :s3_key => YOUR_KEY,
67
+ :s3_secret => YOUR_SECRET_KEY,
68
+ :destination_dir => 'test/',
69
+ :region => 'eu-west-1',
70
+ :gzip => true,
71
+ :gzip_working_dir => '/tmp/gzip_working_dir'
72
+ })
73
+ ```
55
74
 
56
75
  ## TODO
57
76
 
58
- 1. Allow regex pattern matching to select files to upload
59
- 2. Add optional time, size and number of files uploaded report at end of process
77
+ 1. Add optional time, size and number of files uploaded report at end of process
60
78
 
61
79
  ## Contributing
62
80
 
@@ -1,5 +1,7 @@
1
1
  module S3Uploader
2
2
  KILO_SIZE = 1024.0
3
+ BLOCK_SIZE = 1024 * 1024
4
+
3
5
  def self.upload_directory(source, bucket, options = {})
4
6
  options = {
5
7
  :destination_dir => '',
@@ -12,21 +14,36 @@ module S3Uploader
12
14
  :path_style => false,
13
15
  :regexp => /.*/,
14
16
  :gzip => false,
15
- :gzip_working_dir => source,
17
+ :gzip_working_dir => nil,
16
18
  :time_range => Time.at(0)..(Time.now + (60 * 60 * 24))
17
19
  }.merge(options)
18
20
 
19
21
  log = options[:logger] || Logger.new(STDOUT)
20
22
 
21
23
  raise 'Source must be a directory' unless File.directory?(source)
22
- if options[:gzip_working_dir] != source && options[:gzip_working_dir][source]
23
- raise 'gzip_working_dir may not be located within source-folder'
24
+
25
+
26
+ if options[:gzip]
27
+ if options[:gzip_working_dir].nil?
28
+ raise 'gzip_working_dir required when using gzip'
29
+ else
30
+ source_dir = source.end_with?('/') ? source : [ source, '/'].join
31
+ gzip_working_dir = options[:gzip_working_dir].end_with?('/') ?
32
+ options[:gzip_working_dir] : [ options[:gzip_working_dir], '/'].join
33
+
34
+ if gzip_working_dir.start_with?(source_dir)
35
+ raise 'gzip_working_dir may not be located within source-folder'
36
+ end
37
+ end
38
+
39
+ options[:gzip_working_dir] = options[:gzip_working_dir].chop if options[:gzip_working_dir].end_with?('/')
24
40
  end
25
41
 
42
+
26
43
  if options[:connection]
27
44
  connection = options[:connection]
28
45
  else
29
- raise "Missing access keys" if options[:s3_key].nil? or options[:s3_secret].nil?
46
+ raise "Missing access keys" if options[:s3_key].nil? || options[:s3_secret].nil?
30
47
 
31
48
  connection = Fog::Storage.new({
32
49
  :provider => 'AWS',
@@ -38,15 +55,15 @@ module S3Uploader
38
55
  end
39
56
 
40
57
  source = source.chop if source.end_with?('/')
41
- options[:gzip_working_dir] = options[:gzip_working_dir].chop if options[:gzip_working_dir].end_with?('/')
42
- if options[:destination_dir] != '' and !options[:destination_dir].end_with?('/')
58
+
59
+ if options[:destination_dir] != '' && !options[:destination_dir].end_with?('/')
43
60
  options[:destination_dir] = "#{options[:destination_dir]}/"
44
61
  end
45
62
  total_size = 0
46
63
  files = Queue.new
47
64
 
48
65
  Dir.glob("#{source}/**/*").select { |f| !File.directory?(f) }.each do |f|
49
- if File.basename(f).match(options[:regexp]) and options[:time_range].cover?(File.mtime(f))
66
+ if File.basename(f).match(options[:regexp]) && options[:time_range].cover?(File.mtime(f))
50
67
  if options[:gzip] && File.extname(f) != '.gz'
51
68
  dir, base = File.split(f)
52
69
  dir = dir.sub(source, options[:gzip_working_dir])
@@ -56,8 +73,14 @@ module S3Uploader
56
73
  Zlib::GzipWriter.open(gz_file) do |gz|
57
74
  gz.mtime = File.mtime(f)
58
75
  gz.orig_name = f
59
- gz.write IO.binread(f)
76
+
77
+ File.open(f, 'rb') do |fi|
78
+ while (block_in = fi.read(BLOCK_SIZE)) do
79
+ gz.write block_in
80
+ end
81
+ end
60
82
  end
83
+
61
84
  files << gz_file
62
85
  total_size += File.size(gz_file)
63
86
  else
@@ -76,7 +99,7 @@ module S3Uploader
76
99
 
77
100
  threads = []
78
101
  options[:threads].times do |i|
79
- threads[i] = Thread.new {
102
+ threads[i] = Thread.new do
80
103
 
81
104
  until files.empty?
82
105
  @mutex.synchronize do
@@ -85,7 +108,7 @@ module S3Uploader
85
108
  end
86
109
  file = files.pop rescue nil
87
110
  if file
88
- key = file.gsub(source, '').gsub(options[:gzip_working_dir], '')[1..-1]
111
+ key = file.gsub(source, '').gsub(options[:gzip_working_dir].to_s, '')[1..-1]
89
112
  dest = "#{options[:destination_dir]}#{key}"
90
113
  log.info("[#{Thread.current["file_number"]}/#{total_files}] Uploading #{key} to s3://#{bucket}/#{dest}")
91
114
 
@@ -97,7 +120,7 @@ module S3Uploader
97
120
  )
98
121
  end
99
122
  end
100
- }
123
+ end
101
124
  end
102
125
  threads.each { |t| t.join }
103
126
 
@@ -1,3 +1,3 @@
1
1
  module S3Uploader
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -15,15 +15,31 @@ describe S3Uploader do
15
15
  Logger.new(STDOUT)
16
16
  end
17
17
 
18
- before(:each) do
18
+ let(:connection) do
19
19
  Fog.mock!
20
20
 
21
- FileUtils.rm_rf(Dir.glob("#{tmp_directory}/*"))
21
+ connection = Fog::Storage.new({
22
+ :provider => 'AWS',
23
+ :aws_access_key_id => '11111111111',
24
+ :aws_secret_access_key => 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
25
+ })
26
+
27
+ end
28
+
29
+ before(:each) do
30
+ Fog::Mock.reset
31
+
32
+ connection.directories.create(
33
+ :key => 'mybucket',
34
+ :public => true
35
+ )
36
+
37
+ FileUtils.rm_rf(Dir.glob(File.join(tmp_directory, '*')))
22
38
 
23
39
  (access + error).each do |file|
24
- directory, basename = File.split("#{tmp_directory}/#{file}")
40
+ directory, basename = File.split(File.join(tmp_directory, file))
25
41
  FileUtils.mkdir_p directory
26
- Open3.popen3("dd if=/dev/zero of=#{directory}/#{basename} count=1024 bs=1024")
42
+ create_test_file(File.join(directory, basename), 1)
27
43
  end
28
44
  end
29
45
 
@@ -43,75 +59,49 @@ describe S3Uploader do
43
59
  end
44
60
 
45
61
  it 'should upload all files in a directory' do
46
- connection = double(:connection)
47
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
48
- directory.stub(:files).and_return(files = double(:files))
49
-
50
- files.should_receive(:create).exactly(12).times
62
+ connection.directories.get('mybucket', prefix: 'test1/').files.empty?.should be_true
51
63
 
52
64
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
53
65
  { destination_dir: 'test1/',
54
66
  logger: logger,
55
67
  connection: connection })
68
+
69
+ files = connection.directories.get('mybucket', prefix: 'test1/').files
70
+ expect(files).to have((access + error).size).items
71
+ expect(files.map(&:key)).to match_array((access + error).map { |f| File.join('test1/', f) })
56
72
  end
57
73
 
58
74
  describe 'regexp' do
59
75
 
60
76
  it 'should upload specific files' do
61
- connection = double(:connection)
62
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
63
- directory.stub(:files).and_return(files = double(:files))
64
-
65
- keys = access.dup
66
- files.should_receive(:create).exactly(6).times do |hash|
67
- expect(keys).to include(hash[:key])
68
- keys.delete(hash[:key])
69
- end
70
77
 
71
78
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
72
79
  { logger: logger,
73
80
  connection: connection,
74
81
  regexp: /access/ })
82
+
83
+ files = connection.directories.get('mybucket').files
84
+ expect(files).to have(access.size).items
85
+ expect(files.map(&:key)).to match_array(access)
75
86
  end
76
87
 
77
88
  end
78
89
 
79
90
  describe 'gzip' do
80
91
 
81
- it 'should upload compressed files' do
82
- connection = double(:connection)
83
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
84
- directory.stub(:files).and_return(files = double(:files))
85
-
86
- #expect to upload gz-files only
87
- keys = error.map { |f| f.sub('.gz', '') }.map { |f| f + '.gz' }
88
- files.should_receive(:create).exactly(6).times do |hash|
89
- expect(keys).to include(hash[:key])
90
- keys.delete(hash[:key])
91
- end
92
-
93
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
94
- { logger: logger,
95
- connection: connection,
96
- regexp: /error/,
97
- gzip: true })
92
+ it "should require a gzip working directory" do
93
+ lambda {
94
+ S3Uploader.upload_directory('/tmp', 'mybucket',
95
+ { logger: logger,
96
+ connection: connection,
97
+ gzip: true })
98
+ }.should raise_error('gzip_working_dir required when using gzip')
98
99
  end
99
100
 
100
- it 'should use gzip_working_dir correctly' do
101
+ it 'should compress files before upload when needed' do
101
102
  working_dir = File.join(Dir.tmpdir, 's3uploader_spec/working_dir')
102
103
  FileUtils.mkdir_p working_dir
103
- FileUtils.rm_rf(Dir.glob("#{working_dir}/*"))
104
-
105
- connection = double(:connection)
106
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
107
- directory.stub(:files).and_return(files = double(:files))
108
-
109
- #expect to upload gz-files only
110
- keys = error.map { |f| f.sub('.gz', '') }.map { |f| f + '.gz' }
111
- files.should_receive(:create).exactly(6).times do |hash|
112
- expect(keys).to include(hash[:key])
113
- keys.delete(hash[:key])
114
- end
104
+ FileUtils.rm_rf(Dir.glob(File.join(working_dir, '*')))
115
105
 
116
106
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
117
107
  { logger: logger,
@@ -120,70 +110,83 @@ describe S3Uploader do
120
110
  gzip: true,
121
111
  gzip_working_dir: working_dir })
122
112
 
123
- #only compress files which aren't compressed yet
124
- compressed_files = error.select { |f| File.extname(f) != '.gz' }.map { |f| f + '.gz' }
125
- working_dir_content = Dir["#{working_dir}/**/*"].map { |f| f.sub(working_dir, '')[1..-1] }
126
-
127
- #expect compressed files within working_directory
128
- expect(working_dir_content & compressed_files).to match_array(compressed_files)
113
+ files = connection.directories.get('mybucket').files
114
+ expect(files).to have(error.size).items
115
+ expect(files.map(&:key)).to match_array(error.map { |f| File.extname(f) != '.gz' ? [f, '.gz'].join : f })
129
116
  end
130
117
 
131
118
  it 'when called with bad gzip_working_dir it should raise an exception' do
132
119
  expect {
133
120
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
134
121
  { gzip: true,
135
- gzip_working_dir: File.join(Dir.tmpdir, 'test_s3_uploader/working_dir') })
122
+ gzip_working_dir: File.join(tmp_directory, 'working_dir') })
136
123
  }.to raise_error('gzip_working_dir may not be located within source-folder')
137
124
 
138
125
  expect {
139
126
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
140
- { gzip: true,
127
+ { logger: logger,
128
+ connection: connection,
129
+ regexp: /non_matching/,
130
+ gzip: true,
141
131
  gzip_working_dir: File.join(Dir.tmpdir, 'test_s3_uploader_working_dir') })
142
- }.to raise_error('gzip_working_dir may not be located within source-folder')
132
+ }.to_not raise_error
133
+
143
134
  end
144
135
 
136
+ # Run with: rspec --tag slow
137
+ it 'uploads large files', :slow do
138
+ working_dir = File.join(Dir.tmpdir, 's3uploader_big_file_spec/working_dir')
139
+ big_file_dir = File.join(Dir.tmpdir, 'test_s3_uploader_big_file')
140
+ FileUtils.mkdir_p working_dir
141
+ FileUtils.mkdir_p big_file_dir
142
+ create_test_file(File.join(big_file_dir, 'test_big_file.dmp'), 2*1024)
143
+
144
+ S3Uploader.upload_directory(big_file_dir, 'mybucket',
145
+ { logger: logger,
146
+ connection: connection,
147
+ gzip: true,
148
+ gzip_working_dir: working_dir })
149
+
150
+ files = connection.directories.get('mybucket').files
151
+ expect(files.map(&:key)).to match_array([ 'test_big_file.dmp.gz' ])
152
+
153
+ FileUtils.rm_rf(Dir.glob(File.join(working_dir, '*')))
154
+ FileUtils.rm_rf(Dir.glob(File.join(big_file_dir, '*')))
155
+ end
145
156
  end
146
157
 
147
158
  describe 'time_range' do
148
159
 
149
160
  it 'should not upload any files' do
150
- connection = double(:connection)
151
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
152
- directory.stub(:files).and_return(files = double(:files))
153
-
154
- file_names = access.map { |f| "#{tmp_directory}/#{f}" }
161
+ file_names = access.map { |f| File.join( tmp_directory, f) }
155
162
  yesterday = Time.now - (60 * 60 * 24)
156
163
  File.utime(yesterday, yesterday, *file_names)
157
164
 
158
- files.should_not_receive(:create)
159
-
160
165
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
161
166
  { logger: logger,
162
167
  connection: connection,
163
168
  regexp: /access/,
164
169
  time_range: (Time.now - (60 * 60 * 12))..Time.now })
170
+
171
+ files = connection.directories.get('mybucket').files
172
+ expect(files).to have(0).items
165
173
  end
166
174
 
167
175
  it 'should upload files' do
168
- connection = double(:connection)
169
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
170
- directory.stub(:files).and_return(files = double(:files))
171
-
172
- file_names = access.map { |f| "#{tmp_directory}/#{f}" }
176
+ file_names = access.map { |f| File.join( tmp_directory, f) }
173
177
  yesterday = Time.now - (60 * 60 * 12)
174
178
  File.utime(yesterday, yesterday, *file_names)
175
179
 
176
- keys = access.dup
177
- files.should_receive(:create).exactly(6).times do |hash|
178
- expect(keys).to include(hash[:key])
179
- keys.delete(hash[:key])
180
- end
181
180
 
182
181
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
183
182
  { logger: logger,
184
183
  connection: connection,
185
184
  regexp: /access/,
186
185
  time_range: (Time.now - (60 * 60 * 24))..Time.now })
186
+
187
+ files = connection.directories.get('mybucket').files
188
+ expect(files).to have(access.size).items
189
+ expect(files.map(&:key)).to match_array(access)
187
190
  end
188
191
 
189
192
  end
data/spec/spec_helper.rb CHANGED
@@ -7,4 +7,13 @@ require 'open3'
7
7
  RSpec.configure do |config|
8
8
  config.color_enabled = true
9
9
  config.formatter = 'documentation'
10
+ config.treat_symbols_as_metadata_keys_with_true_values = true
11
+ end
12
+
13
+
14
+ def create_test_file(filename, size)
15
+ File.open(filename, 'w') do |f|
16
+ contents = "x" * (1024*1024)
17
+ size.to_i.times { f.write(contents) }
18
+ end
10
19
  end
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3_uploader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Hein
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-15 00:00:00.000000000 Z
11
+ date: 2014-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fog
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ! '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ! '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 2.14.1
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 2.14.1
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ! '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ! '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  description: S3 multithreaded directory uploader
@@ -60,7 +60,8 @@ executables:
60
60
  extensions: []
61
61
  extra_rdoc_files: []
62
62
  files:
63
- - .gitignore
63
+ - ".gitignore"
64
+ - ".rspec"
64
65
  - Gemfile
65
66
  - LICENSE
66
67
  - README.md
@@ -82,17 +83,17 @@ require_paths:
82
83
  - lib
83
84
  required_ruby_version: !ruby/object:Gem::Requirement
84
85
  requirements:
85
- - - ! '>='
86
+ - - ">="
86
87
  - !ruby/object:Gem::Version
87
88
  version: '0'
88
89
  required_rubygems_version: !ruby/object:Gem::Requirement
89
90
  requirements:
90
- - - ! '>='
91
+ - - ">="
91
92
  - !ruby/object:Gem::Version
92
93
  version: '0'
93
94
  requirements: []
94
95
  rubyforge_project:
95
- rubygems_version: 2.1.2
96
+ rubygems_version: 2.2.2
96
97
  signing_key:
97
98
  specification_version: 4
98
99
  summary: S3 multithreaded directory uploader