s3_uploader 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- MDZiOTVmMWI5NTExMmQ2NzEyMTA1MTliYTVhNGJhMDA1MWJmYjEwZA==
5
- data.tar.gz: !binary |-
6
- ZDcyZTAwNjAwOGFlNzJjYzk0YjU3MGJmYTBiNTM2MmViZDVkYTkwNQ==
2
+ SHA1:
3
+ metadata.gz: 1a8fb8f3417ec607cb24ed10345b5592ddeecde7
4
+ data.tar.gz: 7f749a8deb7fe99d35c4365f264b2bb57206807c
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- YjYxNGVjODAwNjY2Y2E4YzU5NmY4NWEzNDFlNjg2NmI5NmJjYzhkYmNhMTg0
10
- NWNjOGM0MzQ2Yzk4NWRiYWY5N2NkYjUyY2M2ZGUzMGZkYTYyMzEyYmFlOGMw
11
- ZTI1NmQ1ZjFmYTFkN2MyNDViMWM4ZTFiMzFkNzEwYzAxNTBhZGY=
12
- data.tar.gz: !binary |-
13
- MGQ4NjEyMDA3N2M4OWY0NmIwMTI1MjFhY2UzY2NlY2VlZjNhM2NkNDNlZTQz
14
- NmVlNTk0MzNjYjBiOWUzYjBlYTA3ZDBmMzVhZDJiMjlkN2NjYzE5Y2ZjZWFl
15
- ZjgxYjA1OGVhZmU0MjFjODc3ZTE2ZDAyMmIxYjU4MzY1ZTIzMTg=
6
+ metadata.gz: aaaaf8f47a0548a371c867c0de00f639ec0fad740844892d4ba7d0aac4afb3679cb7fcc43737a155c72882e7212d242dfa8595d1cdd03737a705efe1d1c63b9b
7
+ data.tar.gz: 13a457b7691ad2a21ec201a5a64f80a36950edace655852122dc21d55e8c6128c00ec3eb0dabca051c013649b0bc1b4a4fa1d4306470894d19519a4b8cffb2f9
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ # Skip all tests tagged with "slow"
2
+ --tag ~slow
data/README.md CHANGED
@@ -29,18 +29,24 @@ Or install it yourself as:
29
29
 
30
30
  ## Usage
31
31
 
32
+ ```ruby
32
33
  S3Uploader.upload_directory('/tmp/test', 'mybucket',
33
- { :s3_key => YOUR_KEY,
34
- :s3_secret => YOUR_SECRET_KEY,
35
- :destination_dir => 'test/',
36
- :region => 'eu-west-1',
37
- :threads => 4,
38
- :metadata => { 'Cache-Control' => 'max-age=315576000' }
34
+ { :s3_key => YOUR_KEY,
35
+ :s3_secret => YOUR_SECRET_KEY,
36
+ :destination_dir => 'test/',
37
+ :region => 'eu-west-1',
38
+ :threads => 4,
39
+ :metadata => { 'Cache-Control' => 'max-age=315576000' }
39
40
  })
41
+ ```
40
42
 
41
43
  If no keys are provided, it uses S3_KEY and S3_SECRET environment variables. us-east-1 is the default region.
42
44
 
45
+ ```ruby
43
46
  S3Uploader.upload_directory('/tmp/test', 'mybucket', { :destination_dir => 'test/', :threads => 4 })
47
+ ```
48
+
49
+ Metadata headers are documented [here](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html)
44
50
 
45
51
  Or as a command line binary
46
52
 
@@ -49,14 +55,26 @@ Or as a command line binary
49
55
  Again, it uses S3_KEY and S3_SECRET environment variables if non provided in parameters.
50
56
 
51
57
  s3uploader -d test/ -t 4 /tmp/test mybucket
52
-
53
-
54
- Metadata headers are documented [here](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html)
58
+
59
+ ## Compress files
60
+
61
+ If the `:gzip` options is used, files not already compressed are packed using GZip before upload. A GZip working
62
+ directory is required in this case.
63
+
64
+ ```ruby
65
+ S3Uploader.upload_directory('/tmp/test', 'mybucket',
66
+ { :s3_key => YOUR_KEY,
67
+ :s3_secret => YOUR_SECRET_KEY,
68
+ :destination_dir => 'test/',
69
+ :region => 'eu-west-1',
70
+ :gzip => true,
71
+ :gzip_working_dir => '/tmp/gzip_working_dir'
72
+ })
73
+ ```
55
74
 
56
75
  ## TODO
57
76
 
58
- 1. Allow regex pattern matching to select files to upload
59
- 2. Add optional time, size and number of files uploaded report at end of process
77
+ 1. Add optional time, size and number of files uploaded report at end of process
60
78
 
61
79
  ## Contributing
62
80
 
@@ -1,5 +1,7 @@
1
1
  module S3Uploader
2
2
  KILO_SIZE = 1024.0
3
+ BLOCK_SIZE = 1024 * 1024
4
+
3
5
  def self.upload_directory(source, bucket, options = {})
4
6
  options = {
5
7
  :destination_dir => '',
@@ -12,21 +14,36 @@ module S3Uploader
12
14
  :path_style => false,
13
15
  :regexp => /.*/,
14
16
  :gzip => false,
15
- :gzip_working_dir => source,
17
+ :gzip_working_dir => nil,
16
18
  :time_range => Time.at(0)..(Time.now + (60 * 60 * 24))
17
19
  }.merge(options)
18
20
 
19
21
  log = options[:logger] || Logger.new(STDOUT)
20
22
 
21
23
  raise 'Source must be a directory' unless File.directory?(source)
22
- if options[:gzip_working_dir] != source && options[:gzip_working_dir][source]
23
- raise 'gzip_working_dir may not be located within source-folder'
24
+
25
+
26
+ if options[:gzip]
27
+ if options[:gzip_working_dir].nil?
28
+ raise 'gzip_working_dir required when using gzip'
29
+ else
30
+ source_dir = source.end_with?('/') ? source : [ source, '/'].join
31
+ gzip_working_dir = options[:gzip_working_dir].end_with?('/') ?
32
+ options[:gzip_working_dir] : [ options[:gzip_working_dir], '/'].join
33
+
34
+ if gzip_working_dir.start_with?(source_dir)
35
+ raise 'gzip_working_dir may not be located within source-folder'
36
+ end
37
+ end
38
+
39
+ options[:gzip_working_dir] = options[:gzip_working_dir].chop if options[:gzip_working_dir].end_with?('/')
24
40
  end
25
41
 
42
+
26
43
  if options[:connection]
27
44
  connection = options[:connection]
28
45
  else
29
- raise "Missing access keys" if options[:s3_key].nil? or options[:s3_secret].nil?
46
+ raise "Missing access keys" if options[:s3_key].nil? || options[:s3_secret].nil?
30
47
 
31
48
  connection = Fog::Storage.new({
32
49
  :provider => 'AWS',
@@ -38,15 +55,15 @@ module S3Uploader
38
55
  end
39
56
 
40
57
  source = source.chop if source.end_with?('/')
41
- options[:gzip_working_dir] = options[:gzip_working_dir].chop if options[:gzip_working_dir].end_with?('/')
42
- if options[:destination_dir] != '' and !options[:destination_dir].end_with?('/')
58
+
59
+ if options[:destination_dir] != '' && !options[:destination_dir].end_with?('/')
43
60
  options[:destination_dir] = "#{options[:destination_dir]}/"
44
61
  end
45
62
  total_size = 0
46
63
  files = Queue.new
47
64
 
48
65
  Dir.glob("#{source}/**/*").select { |f| !File.directory?(f) }.each do |f|
49
- if File.basename(f).match(options[:regexp]) and options[:time_range].cover?(File.mtime(f))
66
+ if File.basename(f).match(options[:regexp]) && options[:time_range].cover?(File.mtime(f))
50
67
  if options[:gzip] && File.extname(f) != '.gz'
51
68
  dir, base = File.split(f)
52
69
  dir = dir.sub(source, options[:gzip_working_dir])
@@ -56,8 +73,14 @@ module S3Uploader
56
73
  Zlib::GzipWriter.open(gz_file) do |gz|
57
74
  gz.mtime = File.mtime(f)
58
75
  gz.orig_name = f
59
- gz.write IO.binread(f)
76
+
77
+ File.open(f, 'rb') do |fi|
78
+ while (block_in = fi.read(BLOCK_SIZE)) do
79
+ gz.write block_in
80
+ end
81
+ end
60
82
  end
83
+
61
84
  files << gz_file
62
85
  total_size += File.size(gz_file)
63
86
  else
@@ -76,7 +99,7 @@ module S3Uploader
76
99
 
77
100
  threads = []
78
101
  options[:threads].times do |i|
79
- threads[i] = Thread.new {
102
+ threads[i] = Thread.new do
80
103
 
81
104
  until files.empty?
82
105
  @mutex.synchronize do
@@ -85,7 +108,7 @@ module S3Uploader
85
108
  end
86
109
  file = files.pop rescue nil
87
110
  if file
88
- key = file.gsub(source, '').gsub(options[:gzip_working_dir], '')[1..-1]
111
+ key = file.gsub(source, '').gsub(options[:gzip_working_dir].to_s, '')[1..-1]
89
112
  dest = "#{options[:destination_dir]}#{key}"
90
113
  log.info("[#{Thread.current["file_number"]}/#{total_files}] Uploading #{key} to s3://#{bucket}/#{dest}")
91
114
 
@@ -97,7 +120,7 @@ module S3Uploader
97
120
  )
98
121
  end
99
122
  end
100
- }
123
+ end
101
124
  end
102
125
  threads.each { |t| t.join }
103
126
 
@@ -1,3 +1,3 @@
1
1
  module S3Uploader
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -15,15 +15,31 @@ describe S3Uploader do
15
15
  Logger.new(STDOUT)
16
16
  end
17
17
 
18
- before(:each) do
18
+ let(:connection) do
19
19
  Fog.mock!
20
20
 
21
- FileUtils.rm_rf(Dir.glob("#{tmp_directory}/*"))
21
+ connection = Fog::Storage.new({
22
+ :provider => 'AWS',
23
+ :aws_access_key_id => '11111111111',
24
+ :aws_secret_access_key => 'XXXXXXXXXXXXXXXXXXXXXXXXXXX'
25
+ })
26
+
27
+ end
28
+
29
+ before(:each) do
30
+ Fog::Mock.reset
31
+
32
+ connection.directories.create(
33
+ :key => 'mybucket',
34
+ :public => true
35
+ )
36
+
37
+ FileUtils.rm_rf(Dir.glob(File.join(tmp_directory, '*')))
22
38
 
23
39
  (access + error).each do |file|
24
- directory, basename = File.split("#{tmp_directory}/#{file}")
40
+ directory, basename = File.split(File.join(tmp_directory, file))
25
41
  FileUtils.mkdir_p directory
26
- Open3.popen3("dd if=/dev/zero of=#{directory}/#{basename} count=1024 bs=1024")
42
+ create_test_file(File.join(directory, basename), 1)
27
43
  end
28
44
  end
29
45
 
@@ -43,75 +59,49 @@ describe S3Uploader do
43
59
  end
44
60
 
45
61
  it 'should upload all files in a directory' do
46
- connection = double(:connection)
47
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
48
- directory.stub(:files).and_return(files = double(:files))
49
-
50
- files.should_receive(:create).exactly(12).times
62
+ connection.directories.get('mybucket', prefix: 'test1/').files.empty?.should be_true
51
63
 
52
64
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
53
65
  { destination_dir: 'test1/',
54
66
  logger: logger,
55
67
  connection: connection })
68
+
69
+ files = connection.directories.get('mybucket', prefix: 'test1/').files
70
+ expect(files).to have((access + error).size).items
71
+ expect(files.map(&:key)).to match_array((access + error).map { |f| File.join('test1/', f) })
56
72
  end
57
73
 
58
74
  describe 'regexp' do
59
75
 
60
76
  it 'should upload specific files' do
61
- connection = double(:connection)
62
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
63
- directory.stub(:files).and_return(files = double(:files))
64
-
65
- keys = access.dup
66
- files.should_receive(:create).exactly(6).times do |hash|
67
- expect(keys).to include(hash[:key])
68
- keys.delete(hash[:key])
69
- end
70
77
 
71
78
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
72
79
  { logger: logger,
73
80
  connection: connection,
74
81
  regexp: /access/ })
82
+
83
+ files = connection.directories.get('mybucket').files
84
+ expect(files).to have(access.size).items
85
+ expect(files.map(&:key)).to match_array(access)
75
86
  end
76
87
 
77
88
  end
78
89
 
79
90
  describe 'gzip' do
80
91
 
81
- it 'should upload compressed files' do
82
- connection = double(:connection)
83
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
84
- directory.stub(:files).and_return(files = double(:files))
85
-
86
- #expect to upload gz-files only
87
- keys = error.map { |f| f.sub('.gz', '') }.map { |f| f + '.gz' }
88
- files.should_receive(:create).exactly(6).times do |hash|
89
- expect(keys).to include(hash[:key])
90
- keys.delete(hash[:key])
91
- end
92
-
93
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
94
- { logger: logger,
95
- connection: connection,
96
- regexp: /error/,
97
- gzip: true })
92
+ it "should require a gzip working directory" do
93
+ lambda {
94
+ S3Uploader.upload_directory('/tmp', 'mybucket',
95
+ { logger: logger,
96
+ connection: connection,
97
+ gzip: true })
98
+ }.should raise_error('gzip_working_dir required when using gzip')
98
99
  end
99
100
 
100
- it 'should use gzip_working_dir correctly' do
101
+ it 'should compress files before upload when needed' do
101
102
  working_dir = File.join(Dir.tmpdir, 's3uploader_spec/working_dir')
102
103
  FileUtils.mkdir_p working_dir
103
- FileUtils.rm_rf(Dir.glob("#{working_dir}/*"))
104
-
105
- connection = double(:connection)
106
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
107
- directory.stub(:files).and_return(files = double(:files))
108
-
109
- #expect to upload gz-files only
110
- keys = error.map { |f| f.sub('.gz', '') }.map { |f| f + '.gz' }
111
- files.should_receive(:create).exactly(6).times do |hash|
112
- expect(keys).to include(hash[:key])
113
- keys.delete(hash[:key])
114
- end
104
+ FileUtils.rm_rf(Dir.glob(File.join(working_dir, '*')))
115
105
 
116
106
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
117
107
  { logger: logger,
@@ -120,70 +110,83 @@ describe S3Uploader do
120
110
  gzip: true,
121
111
  gzip_working_dir: working_dir })
122
112
 
123
- #only compress files which aren't compressed yet
124
- compressed_files = error.select { |f| File.extname(f) != '.gz' }.map { |f| f + '.gz' }
125
- working_dir_content = Dir["#{working_dir}/**/*"].map { |f| f.sub(working_dir, '')[1..-1] }
126
-
127
- #expect compressed files within working_directory
128
- expect(working_dir_content & compressed_files).to match_array(compressed_files)
113
+ files = connection.directories.get('mybucket').files
114
+ expect(files).to have(error.size).items
115
+ expect(files.map(&:key)).to match_array(error.map { |f| File.extname(f) != '.gz' ? [f, '.gz'].join : f })
129
116
  end
130
117
 
131
118
  it 'when called with bad gzip_working_dir it should raise an exception' do
132
119
  expect {
133
120
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
134
121
  { gzip: true,
135
- gzip_working_dir: File.join(Dir.tmpdir, 'test_s3_uploader/working_dir') })
122
+ gzip_working_dir: File.join(tmp_directory, 'working_dir') })
136
123
  }.to raise_error('gzip_working_dir may not be located within source-folder')
137
124
 
138
125
  expect {
139
126
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
140
- { gzip: true,
127
+ { logger: logger,
128
+ connection: connection,
129
+ regexp: /non_matching/,
130
+ gzip: true,
141
131
  gzip_working_dir: File.join(Dir.tmpdir, 'test_s3_uploader_working_dir') })
142
- }.to raise_error('gzip_working_dir may not be located within source-folder')
132
+ }.to_not raise_error
133
+
143
134
  end
144
135
 
136
+ # Run with: rspec --tag slow
137
+ it 'uploads large files', :slow do
138
+ working_dir = File.join(Dir.tmpdir, 's3uploader_big_file_spec/working_dir')
139
+ big_file_dir = File.join(Dir.tmpdir, 'test_s3_uploader_big_file')
140
+ FileUtils.mkdir_p working_dir
141
+ FileUtils.mkdir_p big_file_dir
142
+ create_test_file(File.join(big_file_dir, 'test_big_file.dmp'), 2*1024)
143
+
144
+ S3Uploader.upload_directory(big_file_dir, 'mybucket',
145
+ { logger: logger,
146
+ connection: connection,
147
+ gzip: true,
148
+ gzip_working_dir: working_dir })
149
+
150
+ files = connection.directories.get('mybucket').files
151
+ expect(files.map(&:key)).to match_array([ 'test_big_file.dmp.gz' ])
152
+
153
+ FileUtils.rm_rf(Dir.glob(File.join(working_dir, '*')))
154
+ FileUtils.rm_rf(Dir.glob(File.join(big_file_dir, '*')))
155
+ end
145
156
  end
146
157
 
147
158
  describe 'time_range' do
148
159
 
149
160
  it 'should not upload any files' do
150
- connection = double(:connection)
151
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
152
- directory.stub(:files).and_return(files = double(:files))
153
-
154
- file_names = access.map { |f| "#{tmp_directory}/#{f}" }
161
+ file_names = access.map { |f| File.join( tmp_directory, f) }
155
162
  yesterday = Time.now - (60 * 60 * 24)
156
163
  File.utime(yesterday, yesterday, *file_names)
157
164
 
158
- files.should_not_receive(:create)
159
-
160
165
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
161
166
  { logger: logger,
162
167
  connection: connection,
163
168
  regexp: /access/,
164
169
  time_range: (Time.now - (60 * 60 * 12))..Time.now })
170
+
171
+ files = connection.directories.get('mybucket').files
172
+ expect(files).to have(0).items
165
173
  end
166
174
 
167
175
  it 'should upload files' do
168
- connection = double(:connection)
169
- connection.stub_chain(:directories, :new).and_return(directory = double(:directory))
170
- directory.stub(:files).and_return(files = double(:files))
171
-
172
- file_names = access.map { |f| "#{tmp_directory}/#{f}" }
176
+ file_names = access.map { |f| File.join( tmp_directory, f) }
173
177
  yesterday = Time.now - (60 * 60 * 12)
174
178
  File.utime(yesterday, yesterday, *file_names)
175
179
 
176
- keys = access.dup
177
- files.should_receive(:create).exactly(6).times do |hash|
178
- expect(keys).to include(hash[:key])
179
- keys.delete(hash[:key])
180
- end
181
180
 
182
181
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
183
182
  { logger: logger,
184
183
  connection: connection,
185
184
  regexp: /access/,
186
185
  time_range: (Time.now - (60 * 60 * 24))..Time.now })
186
+
187
+ files = connection.directories.get('mybucket').files
188
+ expect(files).to have(access.size).items
189
+ expect(files.map(&:key)).to match_array(access)
187
190
  end
188
191
 
189
192
  end
data/spec/spec_helper.rb CHANGED
@@ -7,4 +7,13 @@ require 'open3'
7
7
  RSpec.configure do |config|
8
8
  config.color_enabled = true
9
9
  config.formatter = 'documentation'
10
+ config.treat_symbols_as_metadata_keys_with_true_values = true
11
+ end
12
+
13
+
14
+ def create_test_file(filename, size)
15
+ File.open(filename, 'w') do |f|
16
+ contents = "x" * (1024*1024)
17
+ size.to_i.times { f.write(contents) }
18
+ end
10
19
  end
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3_uploader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Hein
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-15 00:00:00.000000000 Z
11
+ date: 2014-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fog
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ! '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ! '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ~>
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
33
  version: 2.14.1
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ~>
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 2.14.1
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ! '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ! '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  description: S3 multithreaded directory uploader
@@ -60,7 +60,8 @@ executables:
60
60
  extensions: []
61
61
  extra_rdoc_files: []
62
62
  files:
63
- - .gitignore
63
+ - ".gitignore"
64
+ - ".rspec"
64
65
  - Gemfile
65
66
  - LICENSE
66
67
  - README.md
@@ -82,17 +83,17 @@ require_paths:
82
83
  - lib
83
84
  required_ruby_version: !ruby/object:Gem::Requirement
84
85
  requirements:
85
- - - ! '>='
86
+ - - ">="
86
87
  - !ruby/object:Gem::Version
87
88
  version: '0'
88
89
  required_rubygems_version: !ruby/object:Gem::Requirement
89
90
  requirements:
90
- - - ! '>='
91
+ - - ">="
91
92
  - !ruby/object:Gem::Version
92
93
  version: '0'
93
94
  requirements: []
94
95
  rubyforge_project:
95
- rubygems_version: 2.1.2
96
+ rubygems_version: 2.2.2
96
97
  signing_key:
97
98
  specification_version: 4
98
99
  summary: S3 multithreaded directory uploader