s3_uploader 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bf2cd35df114a57623ee6363704be419cce05799
4
- data.tar.gz: f5723eb35fc8d41600472d02b8bb579b6374e4f6
3
+ metadata.gz: 4d6399ae8ff54b55635cf668b0f6b711c907fdb1
4
+ data.tar.gz: b3607639a7a60dcaacdbb41963f4f7e5dddfd5bb
5
5
  SHA512:
6
- metadata.gz: e55f3328934aa57f1f753e1554cdcf689b9afe943662603eeb73a6b00d226475bbdfe22cc1733e235f19a18131b99cde89d8201ac113db338a525ddd79e493c6
7
- data.tar.gz: 46f61b2e5c7ee2c5709df974f4fbdce5efc44090785fb0b25932c0fa880dac620ca6820379dd8eead0fe2abfa28bc011ea02a21581ef0c502be4320f69241973
6
+ metadata.gz: ebb2a6aab8ddd480ea1dc3f163f92f435f0847cca276fc41dbc937848827690d44e092e4af0ac1eb52ab86c1f3a0cd3577bf6d892417eed355b03cb34b1c5391
7
+ data.tar.gz: 81301dba831306bed8f4de9d7323509a477540c8d5da8cc12ecdc4aed5380896f821110a4f5ee824bdce88555c6cb5836ef1608c99191893900e0379adbe1489
data/README.md CHANGED
@@ -30,7 +30,21 @@ Or install it yourself as:
30
30
  ## Usage
31
31
 
32
32
  ```ruby
33
- S3Uploader.upload_directory('/tmp/test', 'mybucket',
33
+ uploader = S3Uploader::Uploader.new({
34
+ :s3_key => YOUR_KEY,
35
+ :s3_secret => YOUR_SECRET_KEY,
36
+ :destination_dir => 'test/',
37
+ :region => 'eu-west-1',
38
+ :threads => 10
39
+ })
40
+
41
+ uploader.upload('/tmp/test', 'mybucket')
42
+ ```
43
+
44
+ or
45
+
46
+ ```ruby
47
+ S3Uploader.upload('/tmp/test', 'mybucket',
34
48
  { :s3_key => YOUR_KEY,
35
49
  :s3_secret => YOUR_SECRET_KEY,
36
50
  :destination_dir => 'test/',
@@ -40,12 +54,14 @@ Or install it yourself as:
40
54
  })
41
55
  ```
42
56
 
43
- If no keys are provided, it uses S3_KEY and S3_SECRET environment variables. us-east-1 is the default region.
57
+ Former static method upload_directory is still supported for backwards compatibility.
44
58
 
45
59
  ```ruby
46
60
  S3Uploader.upload_directory('/tmp/test', 'mybucket', { :destination_dir => 'test/', :threads => 4 })
47
61
  ```
48
62
 
63
+ If no keys are provided, it uses S3_KEY and S3_SECRET environment variables. us-east-1 is the default region.
64
+
49
65
  Metadata headers are documented [here](http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html)
50
66
 
51
67
  Or as a command line binary
@@ -72,10 +88,6 @@ directory is required in this case.
72
88
  })
73
89
  ```
74
90
 
75
- ## TODO
76
-
77
- 1. Add optional time, size and number of files uploaded report at end of process
78
-
79
91
  ## Contributing
80
92
 
81
93
  1. Fork it
@@ -91,6 +103,7 @@ directory is required in this case.
91
103
  * [Philip Cunningham](https://github.com/unsymbol)
92
104
  * [Ludwig Bratke](https://github.com/bratke)
93
105
  * [John Pignata](https://github.com/jpignata)
106
+ * [eperezks](https://github.com/eperezks)
94
107
 
95
108
  ## License
96
109
 
@@ -1,135 +1,159 @@
1
1
  module S3Uploader
2
2
  KILO_SIZE = 1024.0
3
3
  BLOCK_SIZE = 1024 * 1024
4
+ DEFAULT_THREADS_NUMBER = 5
5
+ DEFAULT_AWS_REGION = 'us-east-1'
6
+
7
+ def self.upload(source, bucket, options = {})
8
+ Uploader.new(options).upload(source, bucket)
9
+ end
4
10
 
5
11
  def self.upload_directory(source, bucket, options = {})
6
- options = {
7
- :destination_dir => '',
8
- :threads => 5,
9
- :s3_key => ENV['S3_KEY'],
10
- :s3_secret => ENV['S3_SECRET'],
11
- :public => false,
12
- :region => 'us-east-1',
13
- :metadata => {},
14
- :path_style => false,
15
- :regexp => /.*/,
16
- :gzip => false,
17
- :gzip_working_dir => nil,
18
- :time_range => Time.at(0)..(Time.now + (60 * 60 * 24))
19
- }.merge(options)
20
-
21
- log = options[:logger] || Logger.new(STDOUT)
22
-
23
- raise 'Source must be a directory' unless File.directory?(source)
24
-
25
-
26
- if options[:gzip]
27
- if options[:gzip_working_dir].nil?
12
+ self.upload(source, bucket, options)
13
+ end
14
+
15
+ class Uploader
16
+ attr_writer :logger
17
+
18
+ def initialize(options = {})
19
+
20
+ @options = {
21
+ :destination_dir => '',
22
+ :threads => DEFAULT_THREADS_NUMBER,
23
+ :s3_key => ENV['S3_KEY'],
24
+ :s3_secret => ENV['S3_SECRET'],
25
+ :public => false,
26
+ :region => DEFAULT_AWS_REGION,
27
+ :metadata => {},
28
+ :path_style => false,
29
+ :regexp => nil,
30
+ :gzip => false,
31
+ :gzip_working_dir => nil,
32
+ :time_range => Time.at(0)..(Time.now + (60 * 60 * 24))
33
+ }.merge(options)
34
+
35
+ @logger = @options[:logger] || Logger.new(STDOUT)
36
+
37
+ if @options[:gzip] && @options[:gzip_working_dir].nil?
28
38
  raise 'gzip_working_dir required when using gzip'
29
- else
30
- source_dir = source.end_with?('/') ? source : [ source, '/'].join
31
- gzip_working_dir = options[:gzip_working_dir].end_with?('/') ?
32
- options[:gzip_working_dir] : [ options[:gzip_working_dir], '/'].join
39
+ end
33
40
 
34
- if gzip_working_dir.start_with?(source_dir)
35
- raise 'gzip_working_dir may not be located within source-folder'
41
+ if @options[:connection]
42
+ @connection = @options[:connection]
43
+ else
44
+ if @options[:s3_key].nil? || @options[:s3_secret].nil?
45
+ raise "Missing access keys"
36
46
  end
47
+
48
+ @connection = Fog::Storage.new({
49
+ :provider => 'AWS',
50
+ :aws_access_key_id => @options[:s3_key],
51
+ :aws_secret_access_key => @options[:s3_secret],
52
+ :region => @options[:region],
53
+ :path_style => @options[:path_style]
54
+ })
37
55
  end
38
56
 
39
- options[:gzip_working_dir] = options[:gzip_working_dir].chop if options[:gzip_working_dir].end_with?('/')
57
+ if !@options[:destination_dir].to_s.empty? &&
58
+ !@options[:destination_dir].end_with?('/')
59
+ @options[:destination_dir] << '/'
60
+ end
40
61
  end
41
62
 
63
+ def upload(source, bucket)
64
+ raise 'Source directory is requiered' if source.to_s.empty?
65
+ source << '/' unless source.end_with?('/')
66
+ raise 'Source must be a directory' unless File.directory?(source)
42
67
 
43
- if options[:connection]
44
- connection = options[:connection]
45
- else
46
- raise "Missing access keys" if options[:s3_key].nil? || options[:s3_secret].nil?
68
+ gzip_working_dir = @options[:gzip_working_dir]
47
69
 
48
- connection = Fog::Storage.new({
49
- :provider => 'AWS',
50
- :aws_access_key_id => options[:s3_key],
51
- :aws_secret_access_key => options[:s3_secret],
52
- :region => options[:region],
53
- :path_style => options[:path_style]
54
- })
55
- end
70
+ if @options[:gzip] && !gzip_working_dir.to_s.empty?
71
+ gzip_working_dir << '/' unless gzip_working_dir.end_with?('/')
56
72
 
57
- source = source.chop if source.end_with?('/')
73
+ if gzip_working_dir.start_with?(source)
74
+ raise 'gzip_working_dir may not be located within source-folder'
75
+ end
76
+ end
58
77
 
59
- if options[:destination_dir] != '' && !options[:destination_dir].end_with?('/')
60
- options[:destination_dir] = "#{options[:destination_dir]}/"
61
- end
62
- total_size = 0
63
- files = Queue.new
64
-
65
- Dir.glob("#{source}/**/*").select { |f| !File.directory?(f) }.each do |f|
66
- if File.basename(f).match(options[:regexp]) && options[:time_range].cover?(File.mtime(f))
67
- if options[:gzip] && File.extname(f) != '.gz'
68
- dir, base = File.split(f)
69
- dir = dir.sub(source, options[:gzip_working_dir])
70
- gz_file = "#{dir}/#{base}.gz"
71
-
72
- FileUtils.mkdir_p(dir) unless File.directory?(dir)
73
- Zlib::GzipWriter.open(gz_file) do |gz|
74
- gz.mtime = File.mtime(f)
75
- gz.orig_name = f
76
-
77
- File.open(f, 'rb') do |fi|
78
- while (block_in = fi.read(BLOCK_SIZE)) do
79
- gz.write block_in
78
+ total_size = 0
79
+ files = Queue.new
80
+ regexp = @options[:regexp]
81
+ Dir.glob(File.join(source, '**/*'))
82
+ .select { |f| !File.directory?(f) }.each do |f|
83
+
84
+ if (regexp.nil? || File.basename(f).match(regexp)) &&
85
+ @options[:time_range].cover?(File.mtime(f))
86
+ if @options[:gzip] && File.extname(f) != '.gz'
87
+ dir, base = File.split(f)
88
+ dir = dir.sub(source, gzip_working_dir)
89
+ gz_file = File.join(dir, [ base, '.gz' ].join)
90
+
91
+ @logger.info("Compressing #{f}")
92
+
93
+ FileUtils.mkdir_p(dir)
94
+ Zlib::GzipWriter.open(gz_file) do |gz|
95
+ gz.mtime = File.mtime(f)
96
+ gz.orig_name = f
97
+
98
+ File.open(f, 'rb') do |fi|
99
+ while (block_in = fi.read(BLOCK_SIZE)) do
100
+ gz.write block_in
101
+ end
80
102
  end
81
103
  end
82
- end
83
104
 
84
- files << gz_file
85
- total_size += File.size(gz_file)
86
- else
87
- files << f
88
- total_size += File.size(f)
105
+ files << gz_file
106
+ total_size += File.size(gz_file)
107
+ else
108
+ files << f
109
+ total_size += File.size(f)
110
+ end
89
111
  end
90
112
  end
91
- end
92
113
 
93
- directory = connection.directories.new(:key => bucket)
114
+ directory = @connection.directories.new(:key => bucket)
94
115
 
95
- start = Time.now
96
- total_files = files.size
97
- file_number = 0
98
- @mutex = Mutex.new
116
+ start = Time.now
117
+ total_files = files.size
118
+ file_number = 0
119
+ @mutex = Mutex.new
99
120
 
100
- threads = []
101
- options[:threads].times do |i|
102
- threads[i] = Thread.new do
121
+ threads = []
122
+ @options[:threads].times do |i|
123
+ threads[i] = Thread.new do
103
124
 
104
- until files.empty?
105
- @mutex.synchronize do
106
- file_number += 1
107
- Thread.current["file_number"] = file_number
108
- end
109
- file = files.pop rescue nil
110
- if file
111
- key = file.gsub(source, '').gsub(options[:gzip_working_dir].to_s, '')[1..-1]
112
- dest = "#{options[:destination_dir]}#{key}"
113
- body = File.open(file)
114
- log.info("[#{Thread.current["file_number"]}/#{total_files}] Uploading #{key} to s3://#{bucket}/#{dest}")
115
-
116
- directory.files.create(
117
- :key => dest,
118
- :body => body,
119
- :public => options[:public],
120
- :metadata => options[:metadata]
121
- )
122
- body.close
125
+ until files.empty?
126
+ @mutex.synchronize do
127
+ file_number += 1
128
+ Thread.current["file_number"] = file_number
129
+ end
130
+ file = files.pop rescue nil
131
+ if file
132
+ key = file.sub(source, '').sub(gzip_working_dir.to_s, '')
133
+ dest = [ @options[:destination_dir], key ].join
134
+ body = File.open(file)
135
+ @logger.info(["[", Thread.current["file_number"], "/",
136
+ total_files, "] Uploading ", key,
137
+ " to s3://#{bucket}/#{dest}" ].join)
138
+
139
+ directory.files.create(
140
+ :key => dest,
141
+ :body => body,
142
+ :public => @options[:public],
143
+ :metadata => @options[:metadata]
144
+ )
145
+ body.close
146
+ end
123
147
  end
124
148
  end
125
149
  end
126
- end
127
- threads.each { |t| t.join }
128
-
129
- finish = Time.now
130
- elapsed = finish.to_f - start.to_f
131
- mins, secs = elapsed.divmod 60.0
132
- log.info("Uploaded %d (%.#{0}f KB) in %d:%04.2f" % [total_files, total_size / KILO_SIZE, mins.to_i, secs])
150
+ threads.each { |t| t.join }
133
151
 
152
+ finish = Time.now
153
+ elapsed = finish.to_f - start.to_f
154
+ mins, secs = elapsed.divmod 60.0
155
+ @logger.info("Uploaded %d (%.#{0}f KB) in %d:%04.2f" %
156
+ [total_files, total_size / KILO_SIZE, mins.to_i, secs])
157
+ end
134
158
  end
135
159
  end
@@ -1,3 +1,3 @@
1
1
  module S3Uploader
2
- VERSION = "0.1.3"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -2,15 +2,16 @@ require 'spec_helper'
2
2
 
3
3
  describe S3Uploader do
4
4
 
5
+ let(:tmp_directory) do
6
+ File.join(Dir.tmpdir, 'test_s3_uploader')
7
+ end
5
8
  let(:access) do
6
- %w(access.log access.log.1 access.log.2.gz subdir/access.log subdir/access.log.1 subdir/access.log.2.gz)
9
+ %w(access.log access.log.1 access.log.2.gz subdir/access.log subdir/access.log.1 subdir/access.log.2.gz) +
10
+ [ File.join('subdirX', tmp_directory, 'somefile-access.txt')]
7
11
  end
8
12
  let(:error) do
9
13
  %w(error.log error.log.1 error.log.2.gz subdir/error.log subdir/error.log.1 subdir/error.log.2.gz)
10
14
  end
11
- let(:tmp_directory) do
12
- File.join(Dir.tmpdir, 'test_s3_uploader')
13
- end
14
15
  let(:logger) do
15
16
  Logger.new(STDOUT)
16
17
  end
@@ -45,7 +46,7 @@ describe S3Uploader do
45
46
 
46
47
  it 'when called with missing access keys it should raise an exception' do
47
48
  lambda {
48
- S3Uploader.upload_directory('/tmp', 'mybucket',
49
+ S3Uploader.upload('/tmp', 'mybucket',
49
50
  { destination_dir: 'test1/',
50
51
  s3_key: nil,
51
52
  s3_secret: nil })
@@ -54,13 +55,26 @@ describe S3Uploader do
54
55
 
55
56
  it 'when called with source not directory it should raise an exception' do
56
57
  lambda {
57
- S3Uploader.upload_directory('/xzzaz1232', 'mybucket')
58
+ S3Uploader.upload('/xzzaz1232', 'mybucket')
58
59
  }.should raise_error('Source must be a directory')
59
60
  end
60
61
 
61
62
  it 'should upload all files in a directory' do
62
63
  connection.directories.get('mybucket', prefix: 'test1/').files.empty?.should be_true
63
64
 
65
+ S3Uploader.upload(tmp_directory, 'mybucket',
66
+ { destination_dir: 'test1/',
67
+ logger: logger,
68
+ connection: connection })
69
+
70
+ files = connection.directories.get('mybucket', prefix: 'test1/').files
71
+ expect(files).to have((access + error).size).items
72
+ expect(files.map(&:key)).to match_array((access + error).map { |f| File.join('test1/', f) })
73
+ end
74
+
75
+ it 'should still support upload_directory static method for backwards compatibility' do
76
+ connection.directories.get('mybucket', prefix: 'test1/').files.empty?.should be_true
77
+
64
78
  S3Uploader.upload_directory(tmp_directory, 'mybucket',
65
79
  { destination_dir: 'test1/',
66
80
  logger: logger,
@@ -75,7 +89,7 @@ describe S3Uploader do
75
89
 
76
90
  it 'should upload specific files' do
77
91
 
78
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
92
+ S3Uploader.upload(tmp_directory, 'mybucket',
79
93
  { logger: logger,
80
94
  connection: connection,
81
95
  regexp: /access/ })
@@ -91,7 +105,7 @@ describe S3Uploader do
91
105
 
92
106
  it "should require a gzip working directory" do
93
107
  lambda {
94
- S3Uploader.upload_directory('/tmp', 'mybucket',
108
+ S3Uploader.upload('/tmp', 'mybucket',
95
109
  { logger: logger,
96
110
  connection: connection,
97
111
  gzip: true })
@@ -103,7 +117,7 @@ describe S3Uploader do
103
117
  FileUtils.mkdir_p working_dir
104
118
  FileUtils.rm_rf(Dir.glob(File.join(working_dir, '*')))
105
119
 
106
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
120
+ S3Uploader.upload(tmp_directory, 'mybucket',
107
121
  { logger: logger,
108
122
  connection: connection,
109
123
  regexp: /error/,
@@ -117,13 +131,13 @@ describe S3Uploader do
117
131
 
118
132
  it 'when called with bad gzip_working_dir it should raise an exception' do
119
133
  expect {
120
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
134
+ S3Uploader.upload(tmp_directory, 'mybucket',
121
135
  { gzip: true,
122
136
  gzip_working_dir: File.join(tmp_directory, 'working_dir') })
123
137
  }.to raise_error('gzip_working_dir may not be located within source-folder')
124
138
 
125
139
  expect {
126
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
140
+ S3Uploader.upload(tmp_directory, 'mybucket',
127
141
  { logger: logger,
128
142
  connection: connection,
129
143
  regexp: /non_matching/,
@@ -141,7 +155,7 @@ describe S3Uploader do
141
155
  FileUtils.mkdir_p big_file_dir
142
156
  create_test_file(File.join(big_file_dir, 'test_big_file.dmp'), 2*1024)
143
157
 
144
- S3Uploader.upload_directory(big_file_dir, 'mybucket',
158
+ S3Uploader.upload(big_file_dir, 'mybucket',
145
159
  { logger: logger,
146
160
  connection: connection,
147
161
  gzip: true,
@@ -162,7 +176,7 @@ describe S3Uploader do
162
176
  yesterday = Time.now - (60 * 60 * 24)
163
177
  File.utime(yesterday, yesterday, *file_names)
164
178
 
165
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
179
+ S3Uploader.upload(tmp_directory, 'mybucket',
166
180
  { logger: logger,
167
181
  connection: connection,
168
182
  regexp: /access/,
@@ -178,7 +192,7 @@ describe S3Uploader do
178
192
  File.utime(yesterday, yesterday, *file_names)
179
193
 
180
194
 
181
- S3Uploader.upload_directory(tmp_directory, 'mybucket',
195
+ S3Uploader.upload(tmp_directory, 'mybucket',
182
196
  { logger: logger,
183
197
  connection: connection,
184
198
  regexp: /access/,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: s3_uploader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Hein
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-01 00:00:00.000000000 Z
11
+ date: 2016-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fog-aws