logstash-output-s3 3.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/lib/logstash/outputs/s3.rb +188 -308
- data/lib/logstash/outputs/s3/file_repository.rb +120 -0
- data/lib/logstash/outputs/s3/patch.rb +22 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +59 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
- data/logstash-output-s3.gemspec +2 -2
- data/spec/integration/dynamic_prefix_spec.rb +92 -0
- data/spec/integration/gzip_file_spec.rb +62 -0
- data/spec/integration/gzip_size_rotation_spec.rb +63 -0
- data/spec/integration/restore_from_crash_spec.rb +39 -0
- data/spec/integration/size_rotation_spec.rb +59 -0
- data/spec/integration/stress_test_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
- data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
- data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
- data/spec/outputs/s3/file_repository_spec.rb +146 -0
- data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
- data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
- data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
- data/spec/outputs/s3/temporary_file_spec.rb +40 -0
- data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
- data/spec/outputs/s3/uploader_spec.rb +57 -0
- data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
- data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
- data/spec/outputs/s3_spec.rb +52 -335
- data/spec/spec_helper.rb +6 -0
- data/spec/supports/helpers.rb +33 -9
- metadata +65 -4
- data/spec/integration/s3_spec.rb +0 -97
@@ -0,0 +1,120 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "java"
|
3
|
+
require "concurrent"
|
4
|
+
require "concurrent/timer_task"
|
5
|
+
require "logstash/util"
|
6
|
+
|
7
|
+
ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
|
8
|
+
|
9
|
+
module LogStash
|
10
|
+
module Outputs
|
11
|
+
class S3
|
12
|
+
class FileRepository
|
13
|
+
DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
|
14
|
+
DEFAULT_STALE_TIME_SECS = 15 * 60
|
15
|
+
# Ensure that all access or work done
|
16
|
+
# on a factory is threadsafe
|
17
|
+
class PrefixedValue
|
18
|
+
def initialize(file_factory, stale_time)
|
19
|
+
@file_factory = file_factory
|
20
|
+
@lock = Mutex.new
|
21
|
+
@stale_time = stale_time
|
22
|
+
end
|
23
|
+
|
24
|
+
def with_lock
|
25
|
+
@lock.synchronize {
|
26
|
+
yield @file_factory
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def stale?
|
31
|
+
with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def apply(prefix)
|
35
|
+
return self
|
36
|
+
end
|
37
|
+
|
38
|
+
def delete!
|
39
|
+
with_lock{ |factory| factory.current.delete! }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class FactoryInitializer
|
44
|
+
def initialize(tags, encoding, temporary_directory, stale_time)
|
45
|
+
@tags = tags
|
46
|
+
@encoding = encoding
|
47
|
+
@temporary_directory = temporary_directory
|
48
|
+
@stale_time = stale_time
|
49
|
+
end
|
50
|
+
|
51
|
+
def apply(prefix_key)
|
52
|
+
PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def initialize(tags, encoding, temporary_directory,
|
57
|
+
stale_time = DEFAULT_STALE_TIME_SECS,
|
58
|
+
sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
|
59
|
+
# The path need to contains the prefix so when we start
|
60
|
+
# logtash after a crash we keep the remote structure
|
61
|
+
@prefixed_factories = ConcurrentHashMap.new
|
62
|
+
|
63
|
+
@sweeper_interval = sweeper_interval
|
64
|
+
|
65
|
+
@factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
|
66
|
+
|
67
|
+
start_stale_sweeper
|
68
|
+
end
|
69
|
+
|
70
|
+
def keys
|
71
|
+
@prefixed_factories.keySet
|
72
|
+
end
|
73
|
+
|
74
|
+
def each_files
|
75
|
+
@prefixed_factories.elements.each do |prefixed_file|
|
76
|
+
prefixed_file.with_lock { |factory| yield factory.current }
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Return the file factory
|
81
|
+
def get_factory(prefix_key)
|
82
|
+
@prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
|
83
|
+
end
|
84
|
+
|
85
|
+
def get_file(prefix_key)
|
86
|
+
get_factory(prefix_key) { |factory| yield factory.current }
|
87
|
+
end
|
88
|
+
|
89
|
+
def shutdown
|
90
|
+
stop_stale_sweeper
|
91
|
+
end
|
92
|
+
|
93
|
+
def size
|
94
|
+
@prefixed_factories.size
|
95
|
+
end
|
96
|
+
|
97
|
+
def remove_stale(k, v)
|
98
|
+
if v.stale?
|
99
|
+
@prefixed_factories.remove(k, v)
|
100
|
+
v.delete!
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def start_stale_sweeper
|
105
|
+
@stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
|
106
|
+
LogStash::Util.set_thread_name("S3, Stale factory sweeper")
|
107
|
+
|
108
|
+
@prefixed_factories.forEach{|k,v| remove_stale(k,v)}
|
109
|
+
end
|
110
|
+
|
111
|
+
@stale_sweeper.execute
|
112
|
+
end
|
113
|
+
|
114
|
+
def stop_stale_sweeper
|
115
|
+
@stale_sweeper.shutdown
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# This is patch related to the autoloading and ruby
|
2
|
+
#
|
3
|
+
# The fix exist in jruby 9k but not in the current jruby, not sure when or it will be backported
|
4
|
+
# https://github.com/jruby/jruby/issues/3645
|
5
|
+
#
|
6
|
+
# AWS is doing tricky name discovery in the module to generate the correct error class and
|
7
|
+
# this strategy is bogus in jruby and `eager_autoload` don't fix this issue.
|
8
|
+
#
|
9
|
+
# This will be a short lived patch since AWS is removing the need.
|
10
|
+
# see: https://github.com/aws/aws-sdk-ruby/issues/1301#issuecomment-261115960
|
11
|
+
old_stderr = $stderr
|
12
|
+
|
13
|
+
$stderr = StringIO.new
|
14
|
+
begin
|
15
|
+
module Aws
|
16
|
+
const_set(:S3, Aws::S3)
|
17
|
+
end
|
18
|
+
ensure
|
19
|
+
$stderr = old_stderr
|
20
|
+
end
|
21
|
+
|
22
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class PathValidator
|
6
|
+
INVALID_CHARACTERS = "\^`><"
|
7
|
+
|
8
|
+
def self.valid?(name)
|
9
|
+
name.match(matches_re).nil?
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.matches_re
|
13
|
+
/[#{Regexp.escape(INVALID_CHARACTERS)}]/
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/outputs/s3/size_rotation_policy"
|
3
|
+
require "logstash/outputs/s3/time_rotation_policy"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class S3
|
8
|
+
class SizeAndTimeRotationPolicy
|
9
|
+
def initialize(file_size, time_file)
|
10
|
+
@size_strategy = SizeRotationPolicy.new(file_size)
|
11
|
+
@time_strategy = TimeRotationPolicy.new(time_file)
|
12
|
+
end
|
13
|
+
|
14
|
+
def rotate?(file)
|
15
|
+
@size_strategy.rotate?(file) || @time_strategy.rotate?(file)
|
16
|
+
end
|
17
|
+
|
18
|
+
def needs_periodic?
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class SizeRotationPolicy
|
6
|
+
attr_reader :size_file
|
7
|
+
|
8
|
+
def initialize(size_file)
|
9
|
+
if size_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@size_file = size_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size >= size_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "thread"
|
3
|
+
require "forwardable"
|
4
|
+
require "fileutils"
|
5
|
+
|
6
|
+
module LogStash
|
7
|
+
module Outputs
|
8
|
+
class S3
|
9
|
+
# Wrap the actual file descriptor into an utility classe
|
10
|
+
# It make it more OOP and easier to reason with the paths.
|
11
|
+
class TemporaryFile
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
def_delegators :@fd, :path, :write, :close, :fsync
|
15
|
+
|
16
|
+
attr_reader :fd
|
17
|
+
|
18
|
+
def initialize(key, fd, temp_path)
|
19
|
+
@fd = fd
|
20
|
+
@key = key
|
21
|
+
@temp_path = temp_path
|
22
|
+
@created_at = Time.now
|
23
|
+
end
|
24
|
+
|
25
|
+
def ctime
|
26
|
+
@created_at
|
27
|
+
end
|
28
|
+
|
29
|
+
def temp_path
|
30
|
+
@temp_path
|
31
|
+
end
|
32
|
+
|
33
|
+
def size
|
34
|
+
# Use the fd size to get the accurate result,
|
35
|
+
# so we dont have to deal with fsync
|
36
|
+
# if the file is close we will use the File::size
|
37
|
+
begin
|
38
|
+
@fd.size
|
39
|
+
rescue IOError
|
40
|
+
::File.size(path)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def key
|
45
|
+
@key.gsub(/^\//, "")
|
46
|
+
end
|
47
|
+
|
48
|
+
# Each temporary file is made inside a directory named with an UUID,
|
49
|
+
# instead of deleting the file directly and having the risk of deleting other files
|
50
|
+
# we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
|
51
|
+
# a sandbox.
|
52
|
+
def delete!
|
53
|
+
@fd.close
|
54
|
+
::FileUtils.rm_rf(@temp_path, :secure => true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def empty?
|
58
|
+
size == 0
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.create_from_existing_file(file_path, temporary_folder)
|
62
|
+
key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
|
63
|
+
|
64
|
+
TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
|
65
|
+
::File.open(file_path, "r"),
|
66
|
+
::File.join(temporary_folder, key_parts.slice(0, 1)))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "socket"
|
3
|
+
require "securerandom"
|
4
|
+
require "fileutils"
|
5
|
+
require "zlib"
|
6
|
+
require "forwardable"
|
7
|
+
|
8
|
+
module LogStash
|
9
|
+
module Outputs
|
10
|
+
class S3
|
11
|
+
# Since the file can contains dynamic part, we have to handle a more local structure to
|
12
|
+
# allow a nice recovery from a crash.
|
13
|
+
#
|
14
|
+
# The local structure will look like this.
|
15
|
+
#
|
16
|
+
# <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
|
17
|
+
#
|
18
|
+
# Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
|
19
|
+
# I do not have to mess around to check if the other directory have file in it before destroying them.
|
20
|
+
class TemporaryFileFactory
|
21
|
+
FILE_MODE = "a"
|
22
|
+
GZIP_ENCODING = "gzip"
|
23
|
+
GZIP_EXTENSION = "txt.gz"
|
24
|
+
TXT_EXTENSION = "txt"
|
25
|
+
STRFTIME = "%Y-%m-%dT%H.%M"
|
26
|
+
|
27
|
+
attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
|
28
|
+
|
29
|
+
def initialize(prefix, tags, encoding, temporary_directory)
|
30
|
+
@counter = 0
|
31
|
+
@prefix = prefix
|
32
|
+
|
33
|
+
@tags = tags
|
34
|
+
@encoding = encoding
|
35
|
+
@temporary_directory = temporary_directory
|
36
|
+
@lock = Mutex.new
|
37
|
+
|
38
|
+
rotate!
|
39
|
+
end
|
40
|
+
|
41
|
+
def rotate!
|
42
|
+
@lock.synchronize {
|
43
|
+
@current = new_file
|
44
|
+
increment_counter
|
45
|
+
@current
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
def extension
|
51
|
+
gzip? ? GZIP_EXTENSION : TXT_EXTENSION
|
52
|
+
end
|
53
|
+
|
54
|
+
def gzip?
|
55
|
+
encoding == GZIP_ENCODING
|
56
|
+
end
|
57
|
+
|
58
|
+
def increment_counter
|
59
|
+
@counter += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def current_time
|
63
|
+
Time.now.strftime(STRFTIME)
|
64
|
+
end
|
65
|
+
|
66
|
+
def generate_name
|
67
|
+
filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
|
68
|
+
|
69
|
+
if tags.size > 0
|
70
|
+
"#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
|
71
|
+
else
|
72
|
+
"#{filename}.part#{counter}.#{extension}"
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def new_file
|
77
|
+
uuid = SecureRandom.uuid
|
78
|
+
name = generate_name
|
79
|
+
path = ::File.join(temporary_directory, uuid)
|
80
|
+
key = ::File.join(prefix, name)
|
81
|
+
|
82
|
+
FileUtils.mkdir_p(::File.join(path, prefix))
|
83
|
+
|
84
|
+
io = if gzip?
|
85
|
+
# We have to use this wrapper because we cannot access the size of the
|
86
|
+
# file directly on the gzip writer.
|
87
|
+
IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
|
88
|
+
else
|
89
|
+
::File.open(::File.join(path, key), FILE_MODE)
|
90
|
+
end
|
91
|
+
|
92
|
+
TemporaryFile.new(key, io, path)
|
93
|
+
end
|
94
|
+
|
95
|
+
class IOWrappedGzip
|
96
|
+
extend Forwardable
|
97
|
+
|
98
|
+
def_delegators :@gzip_writer, :write, :close
|
99
|
+
attr_reader :file_io, :gzip_writer
|
100
|
+
|
101
|
+
def initialize(file_io)
|
102
|
+
@file_io = file_io
|
103
|
+
@gzip_writer = Zlib::GzipWriter.open(file_io)
|
104
|
+
end
|
105
|
+
|
106
|
+
def path
|
107
|
+
@gzip_writer.to_io.path
|
108
|
+
end
|
109
|
+
|
110
|
+
def size
|
111
|
+
# to get the current file size
|
112
|
+
@gzip_writer.flush
|
113
|
+
@gzip_writer.to_io.size
|
114
|
+
end
|
115
|
+
|
116
|
+
def fsync
|
117
|
+
@gzip_writer.to_io.fsync
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class TimeRotationPolicy
|
6
|
+
attr_reader :time_file
|
7
|
+
|
8
|
+
def initialize(time_file)
|
9
|
+
if time_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@time_file = time_file * 60
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size > 0 && (Time.now - file.ctime) >= time_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util"
|
3
|
+
require "aws-sdk"
|
4
|
+
|
5
|
+
module LogStash
|
6
|
+
module Outputs
|
7
|
+
class S3
|
8
|
+
class Uploader
|
9
|
+
TIME_BEFORE_RETRYING_SECONDS = 1
|
10
|
+
DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
|
11
|
+
:min_threads => 1,
|
12
|
+
:max_threads => 8,
|
13
|
+
:max_queue => 1,
|
14
|
+
:fallback_policy => :caller_runs
|
15
|
+
})
|
16
|
+
|
17
|
+
|
18
|
+
attr_reader :bucket, :upload_options, :logger
|
19
|
+
|
20
|
+
def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL)
|
21
|
+
@bucket = bucket
|
22
|
+
@workers_pool = threadpool
|
23
|
+
@logger = logger
|
24
|
+
end
|
25
|
+
|
26
|
+
def upload_async(file, options = {})
|
27
|
+
@workers_pool.post do
|
28
|
+
LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
|
29
|
+
upload(file, options)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def upload(file, options = {})
|
34
|
+
upload_options = options.fetch(:upload_options, {})
|
35
|
+
|
36
|
+
begin
|
37
|
+
obj = bucket.object(file.key)
|
38
|
+
obj.upload_file(file.path, upload_options)
|
39
|
+
rescue => e
|
40
|
+
# When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
|
41
|
+
# When the retry limit is reached or another error happen we will wait and retry.
|
42
|
+
#
|
43
|
+
# Thread might be stuck here, but I think its better than losing anything
|
44
|
+
# its either a transient errors or something bad really happened.
|
45
|
+
logger.error("Uploading failed, retrying", :exception => e, :path => file.path, :backtrace => e.backtrace)
|
46
|
+
retry
|
47
|
+
end
|
48
|
+
|
49
|
+
options[:on_complete].call(file) unless options[:on_complete].nil?
|
50
|
+
end
|
51
|
+
|
52
|
+
def stop
|
53
|
+
@workers_pool.shutdown
|
54
|
+
@workers_pool.wait_for_termination(nil) # block until its done
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|