s3reamer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f2d922c65a2453f84342e5092f94aa9dab51959c
4
+ data.tar.gz: 8f56f94a47e7330a89e5624d801c542c23818ef9
5
+ SHA512:
6
+ metadata.gz: 65d8f1fc518f44517bd1febe677d12f81ea0449456a04cf229c1e661c25eb618a28612aaae25e3332bad8563f19b4f442b6896854b4abc08e39e87d929df668e
7
+ data.tar.gz: fad7fd698d7f91f1d996527fc8e1cdc6645db27826642f1b79f04dcb98e323f1ef613b1cdce504c492ebb17be0cac529facf1da18846616c1b44c2fe713b5411
data/.gitignore ADDED
@@ -0,0 +1,38 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /vendor/bundle
26
+ /lib/bundler/man/
27
+
28
+ # for a library or gem, you might want to ignore these files since the code is
29
+ # intended to run in multiple environments; otherwise, check them in:
30
+ Gemfile.lock
31
+ .ruby-version
32
+ .ruby-gemset
33
+
34
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35
+ .rvmrc
36
+
37
+ # IntelliJ
38
+ *.iml
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/bin/s3reamer ADDED
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'aws-sdk'
5
+
6
+ require_relative 'directory_streamer'
7
+
8
+ banner = "Usage: s3reamer DIRECTORY BUCKET_NAME [options]"
9
+ if ARGV.length < 2
10
+ $stderr.puts banner
11
+ exit 1
12
+ end
13
+
14
+ dir = ARGV.shift
15
+ if !File.exists?(dir)
16
+ $stderr.puts "Directory does not exist: #{dir}"
17
+ exit 1
18
+ elsif !File.directory?(dir)
19
+ $stderr.puts "Not a directory: #{dir}"
20
+ exit 1
21
+ end
22
+
23
+ bucket_name = ARGV.shift
24
+
25
+ options = {
26
+ region: 'us-east-1',
27
+ bucket_name: bucket_name,
28
+ directory: dir,
29
+ reader_timeout: 30,
30
+ reader_sleep_interval: 1,
31
+ pool_size: 4,
32
+ log_level: Logger::INFO,
33
+ credentials_file: ENV['HOME'] + '/.aws/credentials',
34
+ credentials_profile: 'default',
35
+ prefix: ''
36
+ }
37
+ OptionParser.new do |opts|
38
+ opts.banner = banner
39
+
40
+ opts.on("-r", "--region [AWS_REGION]") do |v|
41
+ options[:region] = v
42
+ end
43
+
44
+ opts.on("--file-read-timeout [SECONDS]", Integer,
45
+ "Number of seconds to wait for a file to grow before timing out (defaults to 10)") do |v|
46
+ options[:reader_timeout] = v
47
+ end
48
+
49
+ opts.on("--reader-sleep-interval [SECONDS]", Integer,
50
+ "Number of seconds to sleep after an attempted read (defaults to 1)") do |v|
51
+ options[:reader_sleep_interval] = v
52
+ end
53
+
54
+ opts.on("-n", "--parallelism [N]", Integer,
55
+ "Maximum number of concurrent files being processed (defaults to 4)") do |v|
56
+ options[:pool_size] = v
57
+ end
58
+
59
+ opts.on("-v", "--verbose") do
60
+ options[:log_level] = Logger::DEBUG
61
+ end
62
+
63
+ opts.on("-c", "--aws-credientials [PATH]", String,
64
+ "Path to AWS credentials file. Defaults to ~/.aws/credentials") do |v|
65
+ options[:credentials_file] = v
66
+ end
67
+
68
+ opts.on("-p", "--aws-credentials-profile [PROFILE]", String,
69
+ "AWS credentials profile. Defaults to \"default\".") do |v|
70
+ options[:credentials_profile] = v
71
+ end
72
+
73
+ opts.on("--s3-prefix [PREFIX]", String,
74
+ "Prefix to append to all uploaded files. Defaults to empty string.") do |v|
75
+ v = "#{v}/" unless v.end_with?("/")
76
+ options[:prefix] = v
77
+ end
78
+ end.parse!
79
+
80
+ credentials = Aws::SharedCredentials.new(path: options[:credentials_file], profile_name: options[:credentials_profile])
81
+ s3 = Aws::S3::Resource.new(region: options[:region], credentials: credentials)
82
+ bucket = s3.bucket(options[:bucket_name])
83
+ watcher = S3reamer::DirectoryStreamer.new(options)
84
+ watcher.stream_directory(directory: options[:directory], bucket: bucket)
@@ -0,0 +1,125 @@
1
+ require 'set'
2
+ require 'logger'
3
+ require 'rb-inotify'
4
+ require 'aws-sdk'
5
+ require 'thread/pool'
6
+ require 'concurrent'
7
+ require 'thread_safe'
8
+
9
+ require 's3reamer/s3_write_stream'
10
+
11
+ module S3reamer
12
+ class DirectoryStreamer
13
+ DEFAULT_OPTIONS = {
14
+ pool_size: 4,
15
+ log_level: Logger::INFO,
16
+ reader_sleep_interval: 1,
17
+ reader_timeout: 10
18
+ }
19
+
20
+ attr_reader :options
21
+
22
+ def initialize(options = {})
23
+ @options = DEFAULT_OPTIONS.merge(options)
24
+ @log = Logger.new(STDOUT)
25
+ @log.level = options[:log_level]
26
+ end
27
+
28
+ def stream_directory(directory:, bucket:)
29
+ file_statuses = ThreadSafe::Hash.new
30
+ dir_watch = INotify::Notifier.new
31
+ pool = Thread.pool(options[:pool_size])
32
+
33
+ dir_watch.watch(directory, :open, :close, :recursive) do |e|
34
+ filename = e.absolute_name
35
+
36
+ log.debug "Events #{e.flags.inspect} received for: #{filename}"
37
+
38
+ # Don't process directories
39
+ next unless File.exists?(filename) and !File.directory?(filename)
40
+
41
+ # If this is an "open" event, we should only process it if we haven't
42
+ # already started on this file.
43
+ next if e.flags.include?(:open) and file_statuses.include?(filename)
44
+
45
+ # If this is a "close" event, we should update the status to inform the
46
+ # worker thread
47
+ if e.flags.include?(:close) and file_statuses.include?(filename)
48
+ file_statuses[filename] = :close
49
+ next
50
+ end
51
+
52
+ log.info "File opened: #{filename}"
53
+ file_statuses[filename] = :open
54
+
55
+ pool.process {
56
+ log.debug "Starting process for: #{filename}"
57
+
58
+ begin
59
+ prefix = Pathname.new(filename)
60
+ prefix = prefix.relative_path_from(Pathname.new(directory))
61
+ prefix = "#{options[:prefix]}#{prefix}"
62
+
63
+ obj = bucket.object(prefix)
64
+ io = S3reamer::S3WriteStream.new(obj)
65
+ rescue Exception => e
66
+ log.error "Error initializing S3 streamer: #{e}\n#{e.backtrace.join("\n")}"
67
+ raise e
68
+ end
69
+
70
+ log.debug "Initialized S3 streamer"
71
+
72
+ open(filename) do |file|
73
+ stopped = false
74
+ size = 0
75
+ last_successful_read = Time.now
76
+
77
+ # Start with bytes_read != 0 to force at least one read of the file.
78
+ # This addresses the race condition caused by files being opened and
79
+ # closed quickly.
80
+ bytes_read = -1
81
+
82
+ # Go until the file has closed, or until we've not seen any new
83
+ # bytes written to the file past some threshold (specified by
84
+ # options[:reader_timeout]).
85
+ while (file_statuses[filename] == :open || bytes_read != 0) &&
86
+ (last_successful_read + options[:reader_timeout]) > Time.now
87
+
88
+ b = file.read
89
+ bytes_read = b.length
90
+ io.write(b)
91
+
92
+ # If we read any bytes, reset the time at which we last saw new
93
+ # bytes in the file. This prevents the read timeout condition from
94
+ # triggering.
95
+ if bytes_read > 0
96
+ log.debug "Read #{bytes_read} bytes: #{filename}"
97
+ last_successful_read = Time.now
98
+ end
99
+
100
+ sleep options[:reader_sleep_interval] unless file_statuses[filename] != :open
101
+ end
102
+
103
+ log.info "File closed. Completing S3 upload: #{filename}"
104
+ end
105
+
106
+ begin
107
+ io.close
108
+ rescue Exception => e
109
+ log.error "Error completing S3 upload: #{e}:\n#{e.backtrace.join("\n")}"
110
+ end
111
+
112
+ file_statuses.delete(filename)
113
+ }
114
+ end
115
+
116
+ dir_watch.run
117
+ pool.shutdown
118
+ end
119
+
120
+ private
121
+ def log
122
+ @log
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,33 @@
1
+ module S3reamer
2
+ class S3WriteStream
3
+ DEFAULT_OPTIONS = {
4
+ chunk_size: 5 * 1024 * 1024
5
+ }
6
+
7
+ def initialize(object, options = {})
8
+ @buffer = String.new
9
+ @options = DEFAULT_OPTIONS.merge(options)
10
+ @multipart_upload = object.initiate_multipart_upload
11
+ @closed = false
12
+ end
13
+
14
+ def write(data)
15
+ raise RuntimeError.new("Illegal state: cannot write after close.") if @closed
16
+
17
+ @buffer << data
18
+ flush if @buffer.length >= @options[:chunk_size]
19
+ end
20
+
21
+ def flush
22
+ part = @multipart_upload.part(@multipart_upload.parts.count + 1)
23
+ part.upload(body: @buffer)
24
+ @buffer.clear
25
+ end
26
+
27
+ def close
28
+ flush unless @buffer.empty?
29
+ @multipart_upload.complete(compute_parts: true)
30
+ @closed = true
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module S3reamer
2
+ VERSION = '0.0.1'
3
+ end
data/lib/s3reamer.rb ADDED
@@ -0,0 +1 @@
1
+ require 's3reamer/directory_streamer'
data/s3reamer.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ $:.push File.expand_path('../lib', __FILE__)
2
+
3
+ require "s3reamer/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = 's3reamer'
7
+ gem.version = S3reamer::VERSION
8
+
9
+ gem.summary = "Automatically upload files to S3 as they're created"
10
+
11
+ gem.authors = ['Christopher Mullins']
12
+ gem.email = 'chris@sidoh.org'
13
+ gem.homepage = 'http://github.com/sidoh/s3reamer'
14
+
15
+ gem.add_dependency 'rake'
16
+ gem.add_dependency 'aws-sdk', '~> 2'
17
+ gem.add_dependency 'rb-inotify', '~> 0.9'
18
+ gem.add_dependency 'thread', '~> 0.2'
19
+ gem.add_dependency 'thread_safe', '~> 0.3'
20
+ gem.add_dependency 'concurrent-ruby', '~> 1'
21
+
22
+ ignores = File.readlines(".gitignore").grep(/\S+/).map(&:chomp)
23
+ dotfiles = %w[.gitignore]
24
+
25
+ all_files_without_ignores = Dir["**/*"].reject { |f|
26
+ File.directory?(f) || ignores.any? { |i| File.fnmatch(i, f) }
27
+ }
28
+
29
+ gem.files = (all_files_without_ignores + dotfiles).sort
30
+ gem.executables = ["s3reamer"]
31
+
32
+ gem.require_path = "lib"
33
+ end
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: s3reamer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Christopher Mullins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws-sdk
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rb-inotify
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.9'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: thread
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.2'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: thread_safe
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.3'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: concurrent-ruby
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1'
97
+ description:
98
+ email: chris@sidoh.org
99
+ executables:
100
+ - s3reamer
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - Gemfile
106
+ - bin/s3reamer
107
+ - lib/s3reamer.rb
108
+ - lib/s3reamer/directory_streamer.rb
109
+ - lib/s3reamer/s3_write_stream.rb
110
+ - lib/s3reamer/version.rb
111
+ - s3reamer.gemspec
112
+ homepage: http://github.com/sidoh/s3reamer
113
+ licenses: []
114
+ metadata: {}
115
+ post_install_message:
116
+ rdoc_options: []
117
+ require_paths:
118
+ - lib
119
+ required_ruby_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ required_rubygems_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ requirements: []
130
+ rubyforge_project:
131
+ rubygems_version: 2.5.1
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: Automatically upload files to S3 as they're created
135
+ test_files: []