s3reamer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +38 -0
- data/Gemfile +3 -0
- data/bin/s3reamer +84 -0
- data/lib/s3reamer/directory_streamer.rb +125 -0
- data/lib/s3reamer/s3_write_stream.rb +33 -0
- data/lib/s3reamer/version.rb +3 -0
- data/lib/s3reamer.rb +1 -0
- data/s3reamer.gemspec +33 -0
- metadata +135 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f2d922c65a2453f84342e5092f94aa9dab51959c
|
4
|
+
data.tar.gz: 8f56f94a47e7330a89e5624d801c542c23818ef9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 65d8f1fc518f44517bd1febe677d12f81ea0449456a04cf229c1e661c25eb618a28612aaae25e3332bad8563f19b4f442b6896854b4abc08e39e87d929df668e
|
7
|
+
data.tar.gz: fad7fd698d7f91f1d996527fc8e1cdc6645db27826642f1b79f04dcb98e323f1ef613b1cdce504c492ebb17be0cac529facf1da18846616c1b44c2fe713b5411
|
data/.gitignore
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/test/tmp/
|
9
|
+
/test/version_tmp/
|
10
|
+
/tmp/
|
11
|
+
|
12
|
+
## Specific to RubyMotion:
|
13
|
+
.dat*
|
14
|
+
.repl_history
|
15
|
+
build/
|
16
|
+
|
17
|
+
## Documentation cache and generated files:
|
18
|
+
/.yardoc/
|
19
|
+
/_yardoc/
|
20
|
+
/doc/
|
21
|
+
/rdoc/
|
22
|
+
|
23
|
+
## Environment normalisation:
|
24
|
+
/.bundle/
|
25
|
+
/vendor/bundle
|
26
|
+
/lib/bundler/man/
|
27
|
+
|
28
|
+
# for a library or gem, you might want to ignore these files since the code is
|
29
|
+
# intended to run in multiple environments; otherwise, check them in:
|
30
|
+
Gemfile.lock
|
31
|
+
.ruby-version
|
32
|
+
.ruby-gemset
|
33
|
+
|
34
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
35
|
+
.rvmrc
|
36
|
+
|
37
|
+
# IntelliJ
|
38
|
+
*.iml
|
data/Gemfile
ADDED
data/bin/s3reamer
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'aws-sdk'
|
5
|
+
|
6
|
+
require_relative 'directory_streamer'
|
7
|
+
|
8
|
+
banner = "Usage: s3reamer DIRECTORY BUCKET_NAME [options]"
|
9
|
+
if ARGV.length < 2
|
10
|
+
$stderr.puts banner
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
dir = ARGV.shift
|
15
|
+
if !File.exists?(dir)
|
16
|
+
$stderr.puts "Directory does not exist: #{dir}"
|
17
|
+
exit 1
|
18
|
+
elsif !File.directory?(dir)
|
19
|
+
$stderr.puts "Not a directory: #{dir}"
|
20
|
+
exit 1
|
21
|
+
end
|
22
|
+
|
23
|
+
bucket_name = ARGV.shift
|
24
|
+
|
25
|
+
options = {
|
26
|
+
region: 'us-east-1',
|
27
|
+
bucket_name: bucket_name,
|
28
|
+
directory: dir,
|
29
|
+
reader_timeout: 30,
|
30
|
+
reader_sleep_interval: 1,
|
31
|
+
pool_size: 4,
|
32
|
+
log_level: Logger::INFO,
|
33
|
+
credentials_file: ENV['HOME'] + '/.aws/credentials',
|
34
|
+
credentials_profile: 'default',
|
35
|
+
prefix: ''
|
36
|
+
}
|
37
|
+
OptionParser.new do |opts|
|
38
|
+
opts.banner = banner
|
39
|
+
|
40
|
+
opts.on("-r", "--region [AWS_REGION]") do |v|
|
41
|
+
options[:region] = v
|
42
|
+
end
|
43
|
+
|
44
|
+
opts.on("--file-read-timeout [SECONDS]", Integer,
|
45
|
+
"Number of seconds to wait for a file to grow before timing out (defaults to 10)") do |v|
|
46
|
+
options[:reader_timeout] = v
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on("--reader-sleep-interval [SECONDS]", Integer,
|
50
|
+
"Number of seconds to sleep after an attempted read (defaults to 1)") do |v|
|
51
|
+
options[:reader_sleep_interval] = v
|
52
|
+
end
|
53
|
+
|
54
|
+
opts.on("-n", "--parallelism [N]", Integer,
|
55
|
+
"Maximum number of concurrent files being processed (defaults to 4)") do |v|
|
56
|
+
options[:pool_size] = v
|
57
|
+
end
|
58
|
+
|
59
|
+
opts.on("-v", "--verbose") do
|
60
|
+
options[:log_level] = Logger::DEBUG
|
61
|
+
end
|
62
|
+
|
63
|
+
opts.on("-c", "--aws-credientials [PATH]", String,
|
64
|
+
"Path to AWS credentials file. Defaults to ~/.aws/credentials") do |v|
|
65
|
+
options[:credentials_file] = v
|
66
|
+
end
|
67
|
+
|
68
|
+
opts.on("-p", "--aws-credentials-profile [PROFILE]", String,
|
69
|
+
"AWS credentials profile. Defaults to \"default\".") do |v|
|
70
|
+
options[:credentials_profile] = v
|
71
|
+
end
|
72
|
+
|
73
|
+
opts.on("--s3-prefix [PREFIX]", String,
|
74
|
+
"Prefix to append to all uploaded files. Defaults to empty string.") do |v|
|
75
|
+
v = "#{v}/" unless v.end_with?("/")
|
76
|
+
options[:prefix] = v
|
77
|
+
end
|
78
|
+
end.parse!
|
79
|
+
|
80
|
+
credentials = Aws::SharedCredentials.new(path: options[:credentials_file], profile_name: options[:credentials_profile])
|
81
|
+
s3 = Aws::S3::Resource.new(region: options[:region], credentials: credentials)
|
82
|
+
bucket = s3.bucket(options[:bucket_name])
|
83
|
+
watcher = S3reamer::DirectoryStreamer.new(options)
|
84
|
+
watcher.stream_directory(directory: options[:directory], bucket: bucket)
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'logger'
|
3
|
+
require 'rb-inotify'
|
4
|
+
require 'aws-sdk'
|
5
|
+
require 'thread/pool'
|
6
|
+
require 'concurrent'
|
7
|
+
require 'thread_safe'
|
8
|
+
|
9
|
+
require 's3reamer/s3_write_stream'
|
10
|
+
|
11
|
+
module S3reamer
|
12
|
+
class DirectoryStreamer
|
13
|
+
DEFAULT_OPTIONS = {
|
14
|
+
pool_size: 4,
|
15
|
+
log_level: Logger::INFO,
|
16
|
+
reader_sleep_interval: 1,
|
17
|
+
reader_timeout: 10
|
18
|
+
}
|
19
|
+
|
20
|
+
attr_reader :options
|
21
|
+
|
22
|
+
def initialize(options = {})
|
23
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
24
|
+
@log = Logger.new(STDOUT)
|
25
|
+
@log.level = options[:log_level]
|
26
|
+
end
|
27
|
+
|
28
|
+
def stream_directory(directory:, bucket:)
|
29
|
+
file_statuses = ThreadSafe::Hash.new
|
30
|
+
dir_watch = INotify::Notifier.new
|
31
|
+
pool = Thread.pool(options[:pool_size])
|
32
|
+
|
33
|
+
dir_watch.watch(directory, :open, :close, :recursive) do |e|
|
34
|
+
filename = e.absolute_name
|
35
|
+
|
36
|
+
log.debug "Events #{e.flags.inspect} received for: #{filename}"
|
37
|
+
|
38
|
+
# Don't process directories
|
39
|
+
next unless File.exists?(filename) and !File.directory?(filename)
|
40
|
+
|
41
|
+
# If this is an "open" event, we should only process it if we haven't
|
42
|
+
# already started on this file.
|
43
|
+
next if e.flags.include?(:open) and file_statuses.include?(filename)
|
44
|
+
|
45
|
+
# If this is a "close" event, we should update the status to inform the
|
46
|
+
# worker thread
|
47
|
+
if e.flags.include?(:close) and file_statuses.include?(filename)
|
48
|
+
file_statuses[filename] = :close
|
49
|
+
next
|
50
|
+
end
|
51
|
+
|
52
|
+
log.info "File opened: #{filename}"
|
53
|
+
file_statuses[filename] = :open
|
54
|
+
|
55
|
+
pool.process {
|
56
|
+
log.debug "Starting process for: #{filename}"
|
57
|
+
|
58
|
+
begin
|
59
|
+
prefix = Pathname.new(filename)
|
60
|
+
prefix = prefix.relative_path_from(Pathname.new(directory))
|
61
|
+
prefix = "#{options[:prefix]}#{prefix}"
|
62
|
+
|
63
|
+
obj = bucket.object(prefix)
|
64
|
+
io = S3reamer::S3WriteStream.new(obj)
|
65
|
+
rescue Exception => e
|
66
|
+
log.error "Error initializing S3 streamer: #{e}\n#{e.backtrace.join("\n")}"
|
67
|
+
raise e
|
68
|
+
end
|
69
|
+
|
70
|
+
log.debug "Initialized S3 streamer"
|
71
|
+
|
72
|
+
open(filename) do |file|
|
73
|
+
stopped = false
|
74
|
+
size = 0
|
75
|
+
last_successful_read = Time.now
|
76
|
+
|
77
|
+
# Start with bytes_read != 0 to force at least one read of the file.
|
78
|
+
# This addresses the race condition caused by files being opened and
|
79
|
+
# closed quickly.
|
80
|
+
bytes_read = -1
|
81
|
+
|
82
|
+
# Go until the file has closed, or until we've not seen any new
|
83
|
+
# bytes written to the file past some threshold (specified by
|
84
|
+
# options[:reader_timeout]).
|
85
|
+
while (file_statuses[filename] == :open || bytes_read != 0) &&
|
86
|
+
(last_successful_read + options[:reader_timeout]) > Time.now
|
87
|
+
|
88
|
+
b = file.read
|
89
|
+
bytes_read = b.length
|
90
|
+
io.write(b)
|
91
|
+
|
92
|
+
# If we read any bytes, reset the time at which we last saw new
|
93
|
+
# bytes in the file. This prevents the read timeout condition from
|
94
|
+
# triggering.
|
95
|
+
if bytes_read > 0
|
96
|
+
log.debug "Read #{bytes_read} bytes: #{filename}"
|
97
|
+
last_successful_read = Time.now
|
98
|
+
end
|
99
|
+
|
100
|
+
sleep options[:reader_sleep_interval] unless file_statuses[filename] != :open
|
101
|
+
end
|
102
|
+
|
103
|
+
log.info "File closed. Completing S3 upload: #{filename}"
|
104
|
+
end
|
105
|
+
|
106
|
+
begin
|
107
|
+
io.close
|
108
|
+
rescue Exception => e
|
109
|
+
log.error "Error completing S3 upload: #{e}:\n#{e.backtrace.join("\n")}"
|
110
|
+
end
|
111
|
+
|
112
|
+
file_statuses.delete(filename)
|
113
|
+
}
|
114
|
+
end
|
115
|
+
|
116
|
+
dir_watch.run
|
117
|
+
pool.shutdown
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
def log
|
122
|
+
@log
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module S3reamer
|
2
|
+
class S3WriteStream
|
3
|
+
DEFAULT_OPTIONS = {
|
4
|
+
chunk_size: 5 * 1024 * 1024
|
5
|
+
}
|
6
|
+
|
7
|
+
def initialize(object, options = {})
|
8
|
+
@buffer = String.new
|
9
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
10
|
+
@multipart_upload = object.initiate_multipart_upload
|
11
|
+
@closed = false
|
12
|
+
end
|
13
|
+
|
14
|
+
def write(data)
|
15
|
+
raise RuntimeError.new("Illegal state: cannot write after close.") if @closed
|
16
|
+
|
17
|
+
@buffer << data
|
18
|
+
flush if @buffer.length >= @options[:chunk_size]
|
19
|
+
end
|
20
|
+
|
21
|
+
def flush
|
22
|
+
part = @multipart_upload.part(@multipart_upload.parts.count + 1)
|
23
|
+
part.upload(body: @buffer)
|
24
|
+
@buffer.clear
|
25
|
+
end
|
26
|
+
|
27
|
+
def close
|
28
|
+
flush unless @buffer.empty?
|
29
|
+
@multipart_upload.complete(compute_parts: true)
|
30
|
+
@closed = true
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/lib/s3reamer.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 's3reamer/directory_streamer'
|
data/s3reamer.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
$:.push File.expand_path('../lib', __FILE__)
|
2
|
+
|
3
|
+
require "s3reamer/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = 's3reamer'
|
7
|
+
gem.version = S3reamer::VERSION
|
8
|
+
|
9
|
+
gem.summary = "Automatically upload files to S3 as they're created"
|
10
|
+
|
11
|
+
gem.authors = ['Christopher Mullins']
|
12
|
+
gem.email = 'chris@sidoh.org'
|
13
|
+
gem.homepage = 'http://github.com/sidoh/s3reamer'
|
14
|
+
|
15
|
+
gem.add_dependency 'rake'
|
16
|
+
gem.add_dependency 'aws-sdk', '~> 2'
|
17
|
+
gem.add_dependency 'rb-inotify', '~> 0.9'
|
18
|
+
gem.add_dependency 'thread', '~> 0.2'
|
19
|
+
gem.add_dependency 'thread_safe', '~> 0.3'
|
20
|
+
gem.add_dependency 'concurrent-ruby', '~> 1'
|
21
|
+
|
22
|
+
ignores = File.readlines(".gitignore").grep(/\S+/).map(&:chomp)
|
23
|
+
dotfiles = %w[.gitignore]
|
24
|
+
|
25
|
+
all_files_without_ignores = Dir["**/*"].reject { |f|
|
26
|
+
File.directory?(f) || ignores.any? { |i| File.fnmatch(i, f) }
|
27
|
+
}
|
28
|
+
|
29
|
+
gem.files = (all_files_without_ignores + dotfiles).sort
|
30
|
+
gem.executables = ["s3reamer"]
|
31
|
+
|
32
|
+
gem.require_path = "lib"
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: s3reamer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Christopher Mullins
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: aws-sdk
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rb-inotify
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.9'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.9'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: thread
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0.2'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0.2'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: thread_safe
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.3'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.3'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: concurrent-ruby
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1'
|
97
|
+
description:
|
98
|
+
email: chris@sidoh.org
|
99
|
+
executables:
|
100
|
+
- s3reamer
|
101
|
+
extensions: []
|
102
|
+
extra_rdoc_files: []
|
103
|
+
files:
|
104
|
+
- ".gitignore"
|
105
|
+
- Gemfile
|
106
|
+
- bin/s3reamer
|
107
|
+
- lib/s3reamer.rb
|
108
|
+
- lib/s3reamer/directory_streamer.rb
|
109
|
+
- lib/s3reamer/s3_write_stream.rb
|
110
|
+
- lib/s3reamer/version.rb
|
111
|
+
- s3reamer.gemspec
|
112
|
+
homepage: http://github.com/sidoh/s3reamer
|
113
|
+
licenses: []
|
114
|
+
metadata: {}
|
115
|
+
post_install_message:
|
116
|
+
rdoc_options: []
|
117
|
+
require_paths:
|
118
|
+
- lib
|
119
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
|
+
requirements:
|
126
|
+
- - ">="
|
127
|
+
- !ruby/object:Gem::Version
|
128
|
+
version: '0'
|
129
|
+
requirements: []
|
130
|
+
rubyforge_project:
|
131
|
+
rubygems_version: 2.5.1
|
132
|
+
signing_key:
|
133
|
+
specification_version: 4
|
134
|
+
summary: Automatically upload files to S3 as they're created
|
135
|
+
test_files: []
|