logstash-output-s3 3.2.0 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/lib/logstash/outputs/s3.rb +188 -308
- data/lib/logstash/outputs/s3/file_repository.rb +120 -0
- data/lib/logstash/outputs/s3/patch.rb +22 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +59 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
- data/logstash-output-s3.gemspec +2 -2
- data/spec/integration/dynamic_prefix_spec.rb +92 -0
- data/spec/integration/gzip_file_spec.rb +62 -0
- data/spec/integration/gzip_size_rotation_spec.rb +63 -0
- data/spec/integration/restore_from_crash_spec.rb +39 -0
- data/spec/integration/size_rotation_spec.rb +59 -0
- data/spec/integration/stress_test_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
- data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
- data/spec/outputs/s3/file_repository_spec.rb +146 -0
- data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
- data/spec/outputs/s3/temporary_file_spec.rb +40 -0
- data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/s3/uploader_spec.rb +57 -0
- data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
- data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
- data/spec/outputs/s3_spec.rb +52 -335
- data/spec/spec_helper.rb +6 -0
- data/spec/supports/helpers.rb +33 -9
- metadata +65 -4
- data/spec/integration/s3_spec.rb +0 -97
@@ -0,0 +1,120 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "java"
|
3
|
+
require "concurrent"
|
4
|
+
require "concurrent/timer_task"
|
5
|
+
require "logstash/util"
|
6
|
+
|
7
|
+
ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
|
8
|
+
|
9
|
+
module LogStash
|
10
|
+
module Outputs
|
11
|
+
class S3
|
12
|
+
class FileRepository
|
13
|
+
DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
|
14
|
+
DEFAULT_STALE_TIME_SECS = 15 * 60
|
15
|
+
# Ensure that all access or work done
|
16
|
+
# on a factory is threadsafe
|
17
|
+
class PrefixedValue
|
18
|
+
def initialize(file_factory, stale_time)
|
19
|
+
@file_factory = file_factory
|
20
|
+
@lock = Mutex.new
|
21
|
+
@stale_time = stale_time
|
22
|
+
end
|
23
|
+
|
24
|
+
def with_lock
|
25
|
+
@lock.synchronize {
|
26
|
+
yield @file_factory
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def stale?
|
31
|
+
with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def apply(prefix)
|
35
|
+
return self
|
36
|
+
end
|
37
|
+
|
38
|
+
def delete!
|
39
|
+
with_lock{ |factory| factory.current.delete! }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class FactoryInitializer
|
44
|
+
def initialize(tags, encoding, temporary_directory, stale_time)
|
45
|
+
@tags = tags
|
46
|
+
@encoding = encoding
|
47
|
+
@temporary_directory = temporary_directory
|
48
|
+
@stale_time = stale_time
|
49
|
+
end
|
50
|
+
|
51
|
+
def apply(prefix_key)
|
52
|
+
PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def initialize(tags, encoding, temporary_directory,
|
57
|
+
stale_time = DEFAULT_STALE_TIME_SECS,
|
58
|
+
sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
|
59
|
+
# The path need to contains the prefix so when we start
|
60
|
+
# logtash after a crash we keep the remote structure
|
61
|
+
@prefixed_factories = ConcurrentHashMap.new
|
62
|
+
|
63
|
+
@sweeper_interval = sweeper_interval
|
64
|
+
|
65
|
+
@factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
|
66
|
+
|
67
|
+
start_stale_sweeper
|
68
|
+
end
|
69
|
+
|
70
|
+
def keys
|
71
|
+
@prefixed_factories.keySet
|
72
|
+
end
|
73
|
+
|
74
|
+
def each_files
|
75
|
+
@prefixed_factories.elements.each do |prefixed_file|
|
76
|
+
prefixed_file.with_lock { |factory| yield factory.current }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Return the file factory
|
81
|
+
def get_factory(prefix_key)
|
82
|
+
@prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
|
83
|
+
end
|
84
|
+
|
85
|
+
def get_file(prefix_key)
|
86
|
+
get_factory(prefix_key) { |factory| yield factory.current }
|
87
|
+
end
|
88
|
+
|
89
|
+
def shutdown
|
90
|
+
stop_stale_sweeper
|
91
|
+
end
|
92
|
+
|
93
|
+
def size
|
94
|
+
@prefixed_factories.size
|
95
|
+
end
|
96
|
+
|
97
|
+
def remove_stale(k, v)
|
98
|
+
if v.stale?
|
99
|
+
@prefixed_factories.remove(k, v)
|
100
|
+
v.delete!
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def start_stale_sweeper
|
105
|
+
@stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
|
106
|
+
LogStash::Util.set_thread_name("S3, Stale factory sweeper")
|
107
|
+
|
108
|
+
@prefixed_factories.forEach{|k,v| remove_stale(k,v)}
|
109
|
+
end
|
110
|
+
|
111
|
+
@stale_sweeper.execute
|
112
|
+
end
|
113
|
+
|
114
|
+
def stop_stale_sweeper
|
115
|
+
@stale_sweeper.shutdown
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# This is patch related to the autoloading and ruby
|
2
|
+
#
|
3
|
+
# The fix exist in jruby 9k but not in the current jruby, not sure when or it will be backported
|
4
|
+
# https://github.com/jruby/jruby/issues/3645
|
5
|
+
#
|
6
|
+
# AWS is doing tricky name discovery in the module to generate the correct error class and
|
7
|
+
# this strategy is bogus in jruby and `eager_autoload` don't fix this issue.
|
8
|
+
#
|
9
|
+
# This will be a short lived patch since AWS is removing the need.
|
10
|
+
# see: https://github.com/aws/aws-sdk-ruby/issues/1301#issuecomment-261115960
|
11
|
+
old_stderr = $stderr
|
12
|
+
|
13
|
+
$stderr = StringIO.new
|
14
|
+
begin
|
15
|
+
module Aws
|
16
|
+
const_set(:S3, Aws::S3)
|
17
|
+
end
|
18
|
+
ensure
|
19
|
+
$stderr = old_stderr
|
20
|
+
end
|
21
|
+
|
22
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class PathValidator
|
6
|
+
INVALID_CHARACTERS = "\^`><"
|
7
|
+
|
8
|
+
def self.valid?(name)
|
9
|
+
name.match(matches_re).nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.matches_re
|
13
|
+
/[#{Regexp.escape(INVALID_CHARACTERS)}]/
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/s3/size_rotation_policy"
|
3
|
+
require "logstash/outputs/s3/time_rotation_policy"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class S3
|
8
|
+
class SizeAndTimeRotationPolicy
|
9
|
+
def initialize(file_size, time_file)
|
10
|
+
@size_strategy = SizeRotationPolicy.new(file_size)
|
11
|
+
@time_strategy = TimeRotationPolicy.new(time_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rotate?(file)
|
15
|
+
@size_strategy.rotate?(file) || @time_strategy.rotate?(file)
|
16
|
+
end
|
17
|
+
|
18
|
+
def needs_periodic?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class SizeRotationPolicy
|
6
|
+
attr_reader :size_file
|
7
|
+
|
8
|
+
def initialize(size_file)
|
9
|
+
if size_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@size_file = size_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size >= size_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "thread"
|
3
|
+
require "forwardable"
|
4
|
+
require "fileutils"
|
5
|
+
|
6
|
+
module LogStash
|
7
|
+
module Outputs
|
8
|
+
class S3
|
9
|
+
# Wrap the actual file descriptor into an utility classe
|
10
|
+
# It make it more OOP and easier to reason with the paths.
|
11
|
+
class TemporaryFile
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
def_delegators :@fd, :path, :write, :close, :fsync
|
15
|
+
|
16
|
+
attr_reader :fd
|
17
|
+
|
18
|
+
def initialize(key, fd, temp_path)
|
19
|
+
@fd = fd
|
20
|
+
@key = key
|
21
|
+
@temp_path = temp_path
|
22
|
+
@created_at = Time.now
|
23
|
+
end
|
24
|
+
|
25
|
+
def ctime
|
26
|
+
@created_at
|
27
|
+
end
|
28
|
+
|
29
|
+
def temp_path
|
30
|
+
@temp_path
|
31
|
+
end
|
32
|
+
|
33
|
+
def size
|
34
|
+
# Use the fd size to get the accurate result,
|
35
|
+
# so we dont have to deal with fsync
|
36
|
+
# if the file is close we will use the File::size
|
37
|
+
begin
|
38
|
+
@fd.size
|
39
|
+
rescue IOError
|
40
|
+
::File.size(path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def key
|
45
|
+
@key.gsub(/^\//, "")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Each temporary file is made inside a directory named with an UUID,
|
49
|
+
# instead of deleting the file directly and having the risk of deleting other files
|
50
|
+
# we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
|
51
|
+
# a sandbox.
|
52
|
+
def delete!
|
53
|
+
@fd.close
|
54
|
+
::FileUtils.rm_rf(@temp_path, :secure => true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def empty?
|
58
|
+
size == 0
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.create_from_existing_file(file_path, temporary_folder)
|
62
|
+
key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
|
63
|
+
|
64
|
+
TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
|
65
|
+
::File.open(file_path, "r"),
|
66
|
+
::File.join(temporary_folder, key_parts.slice(0, 1)))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "socket"
|
3
|
+
require "securerandom"
|
4
|
+
require "fileutils"
|
5
|
+
require "zlib"
|
6
|
+
require "forwardable"
|
7
|
+
|
8
|
+
module LogStash
|
9
|
+
module Outputs
|
10
|
+
class S3
|
11
|
+
# Since the file can contains dynamic part, we have to handle a more local structure to
|
12
|
+
# allow a nice recovery from a crash.
|
13
|
+
#
|
14
|
+
# The local structure will look like this.
|
15
|
+
#
|
16
|
+
# <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
|
17
|
+
#
|
18
|
+
# Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
|
19
|
+
# I do not have to mess around to check if the other directory have file in it before destroying them.
|
20
|
+
class TemporaryFileFactory
|
21
|
+
FILE_MODE = "a"
|
22
|
+
GZIP_ENCODING = "gzip"
|
23
|
+
GZIP_EXTENSION = "txt.gz"
|
24
|
+
TXT_EXTENSION = "txt"
|
25
|
+
STRFTIME = "%Y-%m-%dT%H.%M"
|
26
|
+
|
27
|
+
attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
|
28
|
+
|
29
|
+
def initialize(prefix, tags, encoding, temporary_directory)
|
30
|
+
@counter = 0
|
31
|
+
@prefix = prefix
|
32
|
+
|
33
|
+
@tags = tags
|
34
|
+
@encoding = encoding
|
35
|
+
@temporary_directory = temporary_directory
|
36
|
+
@lock = Mutex.new
|
37
|
+
|
38
|
+
rotate!
|
39
|
+
end
|
40
|
+
|
41
|
+
def rotate!
|
42
|
+
@lock.synchronize {
|
43
|
+
@current = new_file
|
44
|
+
increment_counter
|
45
|
+
@current
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def extension
|
51
|
+
gzip? ? GZIP_EXTENSION : TXT_EXTENSION
|
52
|
+
end
|
53
|
+
|
54
|
+
def gzip?
|
55
|
+
encoding == GZIP_ENCODING
|
56
|
+
end
|
57
|
+
|
58
|
+
def increment_counter
|
59
|
+
@counter += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def current_time
|
63
|
+
Time.now.strftime(STRFTIME)
|
64
|
+
end
|
65
|
+
|
66
|
+
def generate_name
|
67
|
+
filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
|
68
|
+
|
69
|
+
if tags.size > 0
|
70
|
+
"#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
|
71
|
+
else
|
72
|
+
"#{filename}.part#{counter}.#{extension}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def new_file
|
77
|
+
uuid = SecureRandom.uuid
|
78
|
+
name = generate_name
|
79
|
+
path = ::File.join(temporary_directory, uuid)
|
80
|
+
key = ::File.join(prefix, name)
|
81
|
+
|
82
|
+
FileUtils.mkdir_p(::File.join(path, prefix))
|
83
|
+
|
84
|
+
io = if gzip?
|
85
|
+
# We have to use this wrapper because we cannot access the size of the
|
86
|
+
# file directly on the gzip writer.
|
87
|
+
IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
|
88
|
+
else
|
89
|
+
::File.open(::File.join(path, key), FILE_MODE)
|
90
|
+
end
|
91
|
+
|
92
|
+
TemporaryFile.new(key, io, path)
|
93
|
+
end
|
94
|
+
|
95
|
+
class IOWrappedGzip
|
96
|
+
extend Forwardable
|
97
|
+
|
98
|
+
def_delegators :@gzip_writer, :write, :close
|
99
|
+
attr_reader :file_io, :gzip_writer
|
100
|
+
|
101
|
+
def initialize(file_io)
|
102
|
+
@file_io = file_io
|
103
|
+
@gzip_writer = Zlib::GzipWriter.open(file_io)
|
104
|
+
end
|
105
|
+
|
106
|
+
def path
|
107
|
+
@gzip_writer.to_io.path
|
108
|
+
end
|
109
|
+
|
110
|
+
def size
|
111
|
+
# to get the current file size
|
112
|
+
@gzip_writer.flush
|
113
|
+
@gzip_writer.to_io.size
|
114
|
+
end
|
115
|
+
|
116
|
+
def fsync
|
117
|
+
@gzip_writer.to_io.fsync
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class TimeRotationPolicy
|
6
|
+
attr_reader :time_file
|
7
|
+
|
8
|
+
def initialize(time_file)
|
9
|
+
if time_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@time_file = time_file * 60
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size > 0 && (Time.now - file.ctime) >= time_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util"
|
3
|
+
require "aws-sdk"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class S3
|
8
|
+
class Uploader
|
9
|
+
TIME_BEFORE_RETRYING_SECONDS = 1
|
10
|
+
DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
|
11
|
+
:min_threads => 1,
|
12
|
+
:max_threads => 8,
|
13
|
+
:max_queue => 1,
|
14
|
+
:fallback_policy => :caller_runs
|
15
|
+
})
|
16
|
+
|
17
|
+
|
18
|
+
attr_reader :bucket, :upload_options, :logger
|
19
|
+
|
20
|
+
def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL)
|
21
|
+
@bucket = bucket
|
22
|
+
@workers_pool = threadpool
|
23
|
+
@logger = logger
|
24
|
+
end
|
25
|
+
|
26
|
+
def upload_async(file, options = {})
|
27
|
+
@workers_pool.post do
|
28
|
+
LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
|
29
|
+
upload(file, options)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def upload(file, options = {})
|
34
|
+
upload_options = options.fetch(:upload_options, {})
|
35
|
+
|
36
|
+
begin
|
37
|
+
obj = bucket.object(file.key)
|
38
|
+
obj.upload_file(file.path, upload_options)
|
39
|
+
rescue => e
|
40
|
+
# When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
|
41
|
+
# When the retry limit is reached or another error happen we will wait and retry.
|
42
|
+
#
|
43
|
+
# Thread might be stuck here, but I think its better than losing anything
|
44
|
+
# its either a transient errors or something bad really happened.
|
45
|
+
logger.error("Uploading failed, retrying", :exception => e, :path => file.path, :backtrace => e.backtrace)
|
46
|
+
retry
|
47
|
+
end
|
48
|
+
|
49
|
+
options[:on_complete].call(file) unless options[:on_complete].nil?
|
50
|
+
end
|
51
|
+
|
52
|
+
def stop
|
53
|
+
@workers_pool.shutdown
|
54
|
+
@workers_pool.wait_for_termination(nil) # block until its done
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|