s3reamer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f2d922c65a2453f84342e5092f94aa9dab51959c
4
+ data.tar.gz: 8f56f94a47e7330a89e5624d801c542c23818ef9
5
+ SHA512:
6
+ metadata.gz: 65d8f1fc518f44517bd1febe677d12f81ea0449456a04cf229c1e661c25eb618a28612aaae25e3332bad8563f19b4f442b6896854b4abc08e39e87d929df668e
7
+ data.tar.gz: fad7fd698d7f91f1d996527fc8e1cdc6645db27826642f1b79f04dcb98e323f1ef613b1cdce504c492ebb17be0cac529facf1da18846616c1b44c2fe713b5411
data/.gitignore ADDED
@@ -0,0 +1,38 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /vendor/bundle
26
+ /lib/bundler/man/
27
+
28
+ # for a library or gem, you might want to ignore these files since the code is
29
+ # intended to run in multiple environments; otherwise, check them in:
30
+ Gemfile.lock
31
+ .ruby-version
32
+ .ruby-gemset
33
+
34
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35
+ .rvmrc
36
+
37
+ # IntelliJ
38
+ *.iml
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/bin/s3reamer ADDED
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'aws-sdk'
5
+
6
+ require_relative 'directory_streamer'
7
+
8
+ banner = "Usage: s3reamer DIRECTORY BUCKET_NAME [options]"
9
+ if ARGV.length < 2
10
+ $stderr.puts banner
11
+ exit 1
12
+ end
13
+
14
+ dir = ARGV.shift
15
+ if !File.exists?(dir)
16
+ $stderr.puts "Directory does not exist: #{dir}"
17
+ exit 1
18
+ elsif !File.directory?(dir)
19
+ $stderr.puts "Not a directory: #{dir}"
20
+ exit 1
21
+ end
22
+
23
+ bucket_name = ARGV.shift
24
+
25
+ options = {
26
+ region: 'us-east-1',
27
+ bucket_name: bucket_name,
28
+ directory: dir,
29
+ reader_timeout: 30,
30
+ reader_sleep_interval: 1,
31
+ pool_size: 4,
32
+ log_level: Logger::INFO,
33
+ credentials_file: ENV['HOME'] + '/.aws/credentials',
34
+ credentials_profile: 'default',
35
+ prefix: ''
36
+ }
37
+ OptionParser.new do |opts|
38
+ opts.banner = banner
39
+
40
+ opts.on("-r", "--region [AWS_REGION]") do |v|
41
+ options[:region] = v
42
+ end
43
+
44
+ opts.on("--file-read-timeout [SECONDS]", Integer,
45
+ "Number of seconds to wait for a file to grow before timing out (defaults to 10)") do |v|
46
+ options[:reader_timeout] = v
47
+ end
48
+
49
+ opts.on("--reader-sleep-interval [SECONDS]", Integer,
50
+ "Number of seconds to sleep after an attempted read (defaults to 1)") do |v|
51
+ options[:reader_sleep_interval] = v
52
+ end
53
+
54
+ opts.on("-n", "--parallelism [N]", Integer,
55
+ "Maximum number of concurrent files being processed (defaults to 4)") do |v|
56
+ options[:pool_size] = v
57
+ end
58
+
59
+ opts.on("-v", "--verbose") do
60
+ options[:log_level] = Logger::DEBUG
61
+ end
62
+
63
+ opts.on("-c", "--aws-credientials [PATH]", String,
64
+ "Path to AWS credentials file. Defaults to ~/.aws/credentials") do |v|
65
+ options[:credentials_file] = v
66
+ end
67
+
68
+ opts.on("-p", "--aws-credentials-profile [PROFILE]", String,
69
+ "AWS credentials profile. Defaults to \"default\".") do |v|
70
+ options[:credentials_profile] = v
71
+ end
72
+
73
+ opts.on("--s3-prefix [PREFIX]", String,
74
+ "Prefix to append to all uploaded files. Defaults to empty string.") do |v|
75
+ v = "#{v}/" unless v.end_with?("/")
76
+ options[:prefix] = v
77
+ end
78
+ end.parse!
79
+
80
+ credentials = Aws::SharedCredentials.new(path: options[:credentials_file], profile_name: options[:credentials_profile])
81
+ s3 = Aws::S3::Resource.new(region: options[:region], credentials: credentials)
82
+ bucket = s3.bucket(options[:bucket_name])
83
+ watcher = S3reamer::DirectoryStreamer.new(options)
84
+ watcher.stream_directory(directory: options[:directory], bucket: bucket)
@@ -0,0 +1,125 @@
1
+ require 'set'
2
+ require 'logger'
3
+ require 'rb-inotify'
4
+ require 'aws-sdk'
5
+ require 'thread/pool'
6
+ require 'concurrent'
7
+ require 'thread_safe'
8
+
9
+ require 's3reamer/s3_write_stream'
10
+
11
+ module S3reamer
12
+ class DirectoryStreamer
13
+ DEFAULT_OPTIONS = {
14
+ pool_size: 4,
15
+ log_level: Logger::INFO,
16
+ reader_sleep_interval: 1,
17
+ reader_timeout: 10
18
+ }
19
+
20
+ attr_reader :options
21
+
22
+ def initialize(options = {})
23
+ @options = DEFAULT_OPTIONS.merge(options)
24
+ @log = Logger.new(STDOUT)
25
+ @log.level = options[:log_level]
26
+ end
27
+
28
+ def stream_directory(directory:, bucket:)
29
+ file_statuses = ThreadSafe::Hash.new
30
+ dir_watch = INotify::Notifier.new
31
+ pool = Thread.pool(options[:pool_size])
32
+
33
+ dir_watch.watch(directory, :open, :close, :recursive) do |e|
34
+ filename = e.absolute_name
35
+
36
+ log.debug "Events #{e.flags.inspect} received for: #{filename}"
37
+
38
+ # Don't process directories
39
+ next unless File.exists?(filename) and !File.directory?(filename)
40
+
41
+ # If this is an "open" event, we should only process it if we haven't
42
+ # already started on this file.
43
+ next if e.flags.include?(:open) and file_statuses.include?(filename)
44
+
45
+ # If this is a "close" event, we should update the status to inform the
46
+ # worker thread
47
+ if e.flags.include?(:close) and file_statuses.include?(filename)
48
+ file_statuses[filename] = :close
49
+ next
50
+ end
51
+
52
+ log.info "File opened: #{filename}"
53
+ file_statuses[filename] = :open
54
+
55
+ pool.process {
56
+ log.debug "Starting process for: #{filename}"
57
+
58
+ begin
59
+ prefix = Pathname.new(filename)
60
+ prefix = prefix.relative_path_from(Pathname.new(directory))
61
+ prefix = "#{options[:prefix]}#{prefix}"
62
+
63
+ obj = bucket.object(prefix)
64
+ io = S3reamer::S3WriteStream.new(obj)
65
+ rescue Exception => e
66
+ log.error "Error initializing S3 streamer: #{e}\n#{e.backtrace.join("\n")}"
67
+ raise e
68
+ end
69
+
70
+ log.debug "Initialized S3 streamer"
71
+
72
+ open(filename) do |file|
73
+ stopped = false
74
+ size = 0
75
+ last_successful_read = Time.now
76
+
77
+ # Start with bytes_read != 0 to force at least one read of the file.
78
+ # This addresses the race condition caused by files being opened and
79
+ # closed quickly.
80
+ bytes_read = -1
81
+
82
+ # Go until the file has closed, or until we've not seen any new
83
+ # bytes written to the file past some threshold (specified by
84
+ # options[:reader_timeout]).
85
+ while (file_statuses[filename] == :open || bytes_read != 0) &&
86
+ (last_successful_read + options[:reader_timeout]) > Time.now
87
+
88
+ b = file.read
89
+ bytes_read = b.length
90
+ io.write(b)
91
+
92
+ # If we read any bytes, reset the time at which we last saw new
93
+ # bytes in the file. This prevents the read timeout condition from
94
+ # triggering.
95
+ if bytes_read > 0
96
+ log.debug "Read #{bytes_read} bytes: #{filename}"
97
+ last_successful_read = Time.now
98
+ end
99
+
100
+ sleep options[:reader_sleep_interval] unless file_statuses[filename] != :open
101
+ end
102
+
103
+ log.info "File closed. Completing S3 upload: #{filename}"
104
+ end
105
+
106
+ begin
107
+ io.close
108
+ rescue Exception => e
109
+ log.error "Error completing S3 upload: #{e}:\n#{e.backtrace.join("\n")}"
110
+ end
111
+
112
+ file_statuses.delete(filename)
113
+ }
114
+ end
115
+
116
+ dir_watch.run
117
+ pool.shutdown
118
+ end
119
+
120
+ private
121
+ def log
122
+ @log
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,33 @@
1
+ module S3reamer
2
+ class S3WriteStream
3
+ DEFAULT_OPTIONS = {
4
+ chunk_size: 5 * 1024 * 1024
5
+ }
6
+
7
+ def initialize(object, options = {})
8
+ @buffer = String.new
9
+ @options = DEFAULT_OPTIONS.merge(options)
10
+ @multipart_upload = object.initiate_multipart_upload
11
+ @closed = false
12
+ end
13
+
14
+ def write(data)
15
+ raise RuntimeError.new("Illegal state: cannot write after close.") if @closed
16
+
17
+ @buffer << data
18
+ flush if @buffer.length >= @options[:chunk_size]
19
+ end
20
+
21
+ def flush
22
+ part = @multipart_upload.part(@multipart_upload.parts.count + 1)
23
+ part.upload(body: @buffer)
24
+ @buffer.clear
25
+ end
26
+
27
+ def close
28
+ flush unless @buffer.empty?
29
+ @multipart_upload.complete(compute_parts: true)
30
+ @closed = true
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module S3reamer
2
+ VERSION = '0.0.1'
3
+ end
data/lib/s3reamer.rb ADDED
@@ -0,0 +1 @@
1
+ require 's3reamer/directory_streamer'
data/s3reamer.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ $:.push File.expand_path('../lib', __FILE__)
2
+
3
+ require "s3reamer/version"
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = 's3reamer'
7
+ gem.version = S3reamer::VERSION
8
+
9
+ gem.summary = "Automatically upload files to S3 as they're created"
10
+
11
+ gem.authors = ['Christopher Mullins']
12
+ gem.email = 'chris@sidoh.org'
13
+ gem.homepage = 'http://github.com/sidoh/s3reamer'
14
+
15
+ gem.add_dependency 'rake'
16
+ gem.add_dependency 'aws-sdk', '~> 2'
17
+ gem.add_dependency 'rb-inotify', '~> 0.9'
18
+ gem.add_dependency 'thread', '~> 0.2'
19
+ gem.add_dependency 'thread_safe', '~> 0.3'
20
+ gem.add_dependency 'concurrent-ruby', '~> 1'
21
+
22
+ ignores = File.readlines(".gitignore").grep(/\S+/).map(&:chomp)
23
+ dotfiles = %w[.gitignore]
24
+
25
+ all_files_without_ignores = Dir["**/*"].reject { |f|
26
+ File.directory?(f) || ignores.any? { |i| File.fnmatch(i, f) }
27
+ }
28
+
29
+ gem.files = (all_files_without_ignores + dotfiles).sort
30
+ gem.executables = ["s3reamer"]
31
+
32
+ gem.require_path = "lib"
33
+ end
metadata ADDED
@@ -0,0 +1,135 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: s3reamer
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Christopher Mullins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws-sdk
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rb-inotify
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.9'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: thread
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.2'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: thread_safe
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.3'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: concurrent-ruby
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1'
97
+ description:
98
+ email: chris@sidoh.org
99
+ executables:
100
+ - s3reamer
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - Gemfile
106
+ - bin/s3reamer
107
+ - lib/s3reamer.rb
108
+ - lib/s3reamer/directory_streamer.rb
109
+ - lib/s3reamer/s3_write_stream.rb
110
+ - lib/s3reamer/version.rb
111
+ - s3reamer.gemspec
112
+ homepage: http://github.com/sidoh/s3reamer
113
+ licenses: []
114
+ metadata: {}
115
+ post_install_message:
116
+ rdoc_options: []
117
+ require_paths:
118
+ - lib
119
+ required_ruby_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ required_rubygems_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ requirements: []
130
+ rubyforge_project:
131
+ rubygems_version: 2.5.1
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: Automatically upload files to S3 as they're created
135
+ test_files: []