logstash-output-s3-zst 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +185 -0
- data/CONTRIBUTORS +21 -0
- data/DEVELOPER.md +15 -0
- data/Gemfile +15 -0
- data/LICENSE +202 -0
- data/NOTICE.TXT +5 -0
- data/README.md +98 -0
- data/VERSION +1 -0
- data/docs/index.asciidoc +451 -0
- data/lib/logstash/outputs/s3/file_repository.rb +177 -0
- data/lib/logstash/outputs/s3/patch.rb +22 -0
- data/lib/logstash/outputs/s3/path_validator.rb +18 -0
- data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
- data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/temporary_file.rb +114 -0
- data/lib/logstash/outputs/s3/temporary_file_factory.rb +126 -0
- data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
- data/lib/logstash/outputs/s3/uploader.rb +82 -0
- data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
- data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +60 -0
- data/lib/logstash/outputs/s3-zst.rb +445 -0
- data/lib/tasks/build.rake +15 -0
- data/logstash-output-s3-zst.gemspec +30 -0
- data/spec/outputs/s3_spec.rb +11 -0
- data/spec/spec_helper.rb +6 -0
- metadata +200 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class SizeRotationPolicy
|
6
|
+
attr_reader :size_file
|
7
|
+
|
8
|
+
def initialize(size_file)
|
9
|
+
if size_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@size_file = size_file
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size >= size_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "thread"
|
3
|
+
require "forwardable"
|
4
|
+
require "fileutils"
|
5
|
+
require "logstash-output-s3_jars"
|
6
|
+
|
7
|
+
module LogStash
|
8
|
+
module Outputs
|
9
|
+
class S3
|
10
|
+
|
11
|
+
java_import 'org.logstash.plugins.outputs.s3.GzipUtil'
|
12
|
+
|
13
|
+
# Wrap the actual file descriptor into an utility class
|
14
|
+
# Make it more OOP and easier to reason with the paths.
|
15
|
+
class TemporaryFile
|
16
|
+
extend Forwardable
|
17
|
+
|
18
|
+
GZIP_EXTENSION = "txt.gz"
|
19
|
+
TXT_EXTENSION = "txt"
|
20
|
+
RECOVERED_FILE_NAME_TAG = "-recovered"
|
21
|
+
|
22
|
+
def_delegators :@fd, :path, :write, :close, :fsync
|
23
|
+
|
24
|
+
attr_reader :fd
|
25
|
+
|
26
|
+
def initialize(key, fd, temp_path)
|
27
|
+
@fd = fd
|
28
|
+
@key = key
|
29
|
+
@temp_path = temp_path
|
30
|
+
@created_at = Time.now
|
31
|
+
end
|
32
|
+
|
33
|
+
def ctime
|
34
|
+
@created_at
|
35
|
+
end
|
36
|
+
|
37
|
+
def temp_path
|
38
|
+
@temp_path
|
39
|
+
end
|
40
|
+
|
41
|
+
def size
|
42
|
+
# Use the fd size to get the accurate result,
|
43
|
+
# so we dont have to deal with fsync
|
44
|
+
# if the file is close, fd.size raises an IO exception so we use the File::size
|
45
|
+
begin
|
46
|
+
# fd is nil when LS tries to recover gzip file but fails
|
47
|
+
return 0 unless @fd != nil
|
48
|
+
@fd.size
|
49
|
+
rescue IOError
|
50
|
+
::File.size(path)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def key
|
55
|
+
@key.gsub(/^\//, "")
|
56
|
+
end
|
57
|
+
|
58
|
+
# Each temporary file is created inside a directory named with an UUID,
|
59
|
+
# instead of deleting the file directly and having the risk of deleting other files
|
60
|
+
# we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
|
61
|
+
# a sandbox.
|
62
|
+
def delete!
|
63
|
+
@fd.close rescue IOError # force close anyway
|
64
|
+
FileUtils.rm_r(@temp_path, :secure => true)
|
65
|
+
end
|
66
|
+
|
67
|
+
def empty?
|
68
|
+
size == 0
|
69
|
+
end
|
70
|
+
|
71
|
+
# only to cover the case where LS cannot restore corrupted file, file is not exist
|
72
|
+
def recoverable?
|
73
|
+
!@fd.nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.create_from_existing_file(file_path, temporary_folder)
|
77
|
+
key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
|
78
|
+
|
79
|
+
# recover gzip file and compress back before uploading to S3
|
80
|
+
if file_path.end_with?("." + GZIP_EXTENSION)
|
81
|
+
file_path = self.recover(file_path)
|
82
|
+
end
|
83
|
+
TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
|
84
|
+
::File.exist?(file_path) ? ::File.open(file_path, "r") : nil, # for the nil case, file size will be 0 and upload will be ignored.
|
85
|
+
::File.join(temporary_folder, key_parts.slice(0, 1)))
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.gzip_extension
|
89
|
+
GZIP_EXTENSION
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.text_extension
|
93
|
+
TXT_EXTENSION
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.recovery_file_name_tag
|
97
|
+
RECOVERED_FILE_NAME_TAG
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
def self.recover(file_path)
|
102
|
+
full_gzip_extension = "." + GZIP_EXTENSION
|
103
|
+
recovered_txt_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + "." + TXT_EXTENSION)
|
104
|
+
recovered_gzip_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + full_gzip_extension)
|
105
|
+
GzipUtil.recover(file_path, recovered_txt_file_path)
|
106
|
+
if ::File.exist?(recovered_txt_file_path) && !::File.zero?(recovered_txt_file_path)
|
107
|
+
GzipUtil.compress(recovered_txt_file_path, recovered_gzip_file_path)
|
108
|
+
end
|
109
|
+
recovered_gzip_file_path
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "socket"
|
3
|
+
require "securerandom"
|
4
|
+
require "fileutils"
|
5
|
+
require "zlib"
|
6
|
+
require "forwardable"
|
7
|
+
|
8
|
+
module LogStash
|
9
|
+
module Outputs
|
10
|
+
class S3
|
11
|
+
# Since the file can contains dynamic part, we have to handle a more local structure to
|
12
|
+
# allow a nice recovery from a crash.
|
13
|
+
#
|
14
|
+
# The local structure will look like this.
|
15
|
+
#
|
16
|
+
# <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
|
17
|
+
#
|
18
|
+
# Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
|
19
|
+
# I do not have to mess around to check if the other directory have file in it before destroying them.
|
20
|
+
class TemporaryFileFactory
|
21
|
+
FILE_MODE = "a"
|
22
|
+
STRFTIME = "%Y-%m-%dT%H.%M"
|
23
|
+
|
24
|
+
attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
|
25
|
+
|
26
|
+
def initialize(prefix, tags, encoding, temporary_directory)
|
27
|
+
@counter = 0
|
28
|
+
@prefix = prefix
|
29
|
+
|
30
|
+
@tags = tags
|
31
|
+
@encoding = encoding
|
32
|
+
@temporary_directory = temporary_directory
|
33
|
+
@lock = Mutex.new
|
34
|
+
|
35
|
+
rotate!
|
36
|
+
end
|
37
|
+
|
38
|
+
def rotate!
|
39
|
+
@lock.synchronize {
|
40
|
+
@current = new_file
|
41
|
+
increment_counter
|
42
|
+
@current
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def extension
|
48
|
+
gzip? ? TemporaryFile.gzip_extension : TemporaryFile.text_extension
|
49
|
+
end
|
50
|
+
|
51
|
+
def gzip?
|
52
|
+
encoding == GZIP_ENCODING
|
53
|
+
end
|
54
|
+
|
55
|
+
def increment_counter
|
56
|
+
@counter += 1
|
57
|
+
end
|
58
|
+
|
59
|
+
def current_time
|
60
|
+
Time.now.strftime(STRFTIME)
|
61
|
+
end
|
62
|
+
|
63
|
+
def generate_name
|
64
|
+
filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
|
65
|
+
|
66
|
+
if tags.size > 0
|
67
|
+
"#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
|
68
|
+
else
|
69
|
+
"#{filename}.part#{counter}.#{extension}"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def new_file
|
74
|
+
uuid = SecureRandom.uuid
|
75
|
+
name = generate_name
|
76
|
+
path = ::File.join(temporary_directory, uuid)
|
77
|
+
key = ::File.join(prefix, name)
|
78
|
+
|
79
|
+
FileUtils.mkdir_p(::File.join(path, prefix))
|
80
|
+
|
81
|
+
io = if gzip?
|
82
|
+
# We have to use this wrapper because we cannot access the size of the
|
83
|
+
# file directly on the gzip writer.
|
84
|
+
IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
|
85
|
+
else
|
86
|
+
::File.open(::File.join(path, key), FILE_MODE)
|
87
|
+
end
|
88
|
+
|
89
|
+
TemporaryFile.new(key, io, path)
|
90
|
+
end
|
91
|
+
|
92
|
+
class IOWrappedGzip
|
93
|
+
extend Forwardable
|
94
|
+
|
95
|
+
def_delegators :@gzip_writer, :write, :close
|
96
|
+
attr_reader :file_io, :gzip_writer
|
97
|
+
|
98
|
+
def initialize(file_io)
|
99
|
+
@file_io = file_io
|
100
|
+
@gzip_writer = Zlib::GzipWriter.new(file_io)
|
101
|
+
end
|
102
|
+
|
103
|
+
def path
|
104
|
+
@gzip_writer.to_io.path
|
105
|
+
end
|
106
|
+
|
107
|
+
def size
|
108
|
+
# to get the current file size
|
109
|
+
if @gzip_writer.pos == 0
|
110
|
+
# Ensure a zero file size is returned when nothing has
|
111
|
+
# yet been written to the gzip file.
|
112
|
+
0
|
113
|
+
else
|
114
|
+
@gzip_writer.flush
|
115
|
+
@gzip_writer.to_io.size
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def fsync
|
120
|
+
@gzip_writer.to_io.fsync
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class TimeRotationPolicy
|
6
|
+
attr_reader :time_file
|
7
|
+
|
8
|
+
def initialize(time_file)
|
9
|
+
if time_file <= 0
|
10
|
+
raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
|
11
|
+
end
|
12
|
+
|
13
|
+
@time_file = time_file * 60
|
14
|
+
end
|
15
|
+
|
16
|
+
def rotate?(file)
|
17
|
+
file.size > 0 && (Time.now - file.ctime) >= time_file
|
18
|
+
end
|
19
|
+
|
20
|
+
def needs_periodic?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/util"
|
3
|
+
require "aws-sdk"
|
4
|
+
require 'securerandom'
|
5
|
+
require 'zstd'
|
6
|
+
|
7
|
+
module LogStash
|
8
|
+
module Outputs
|
9
|
+
class S3
|
10
|
+
class Uploader
|
11
|
+
|
12
|
+
DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
|
13
|
+
:min_threads => 1,
|
14
|
+
:max_threads => 8,
|
15
|
+
:max_queue => 1,
|
16
|
+
:fallback_policy => :caller_runs
|
17
|
+
})
|
18
|
+
|
19
|
+
attr_reader :bucket, :upload_options, :logger
|
20
|
+
|
21
|
+
def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL, retry_count: Float::INFINITY, retry_delay: 1)
|
22
|
+
@bucket = bucket
|
23
|
+
@workers_pool = threadpool
|
24
|
+
@logger = logger
|
25
|
+
@retry_count = retry_count
|
26
|
+
@retry_delay = retry_delay
|
27
|
+
end
|
28
|
+
|
29
|
+
def upload_async(file, options = {})
|
30
|
+
@workers_pool.post do
|
31
|
+
LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
|
32
|
+
upload(file, options)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# uploads a TemporaryFile to S3
|
37
|
+
def upload(file, options = {})
|
38
|
+
upload_options = options.fetch(:upload_options, {})
|
39
|
+
|
40
|
+
zstd_compressed_file = "#{SecureRandom.uuid}.json.zst"
|
41
|
+
|
42
|
+
compressed = Zstd.compress_file(file.path, zstd_compressed_tempfile)
|
43
|
+
|
44
|
+
tries = 0
|
45
|
+
begin
|
46
|
+
obj = bucket.object(compressed.key)
|
47
|
+
obj.upload_file(compressed.path, upload_options)
|
48
|
+
rescue Errno::ENOENT => e
|
49
|
+
logger.error("File doesn't exist! Unrecoverable error.", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
|
50
|
+
rescue => e
|
51
|
+
# When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
|
52
|
+
# When the retry limit is reached or another error happen we will wait and retry.
|
53
|
+
#
|
54
|
+
# Thread might be stuck here, but I think its better than losing anything
|
55
|
+
# its either a transient errors or something bad really happened.
|
56
|
+
if tries < @retry_count
|
57
|
+
tries += 1
|
58
|
+
logger.warn("Uploading failed, retrying (##{tries} of #{@retry_count})", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
|
59
|
+
sleep @retry_delay
|
60
|
+
retry
|
61
|
+
else
|
62
|
+
logger.error("Failed to upload file (retried #{@retry_count} times).", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
begin
|
67
|
+
options[:on_complete].call(compressed) unless options[:on_complete].nil?
|
68
|
+
rescue => e
|
69
|
+
logger.error("An error occurred in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => compressed.path, :backtrace => e.backtrace)
|
70
|
+
raise e # reraise it since we don't deal with it now
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def stop
|
75
|
+
@workers_pool.shutdown
|
76
|
+
@workers_pool.wait_for_termination(nil) # block until its done
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module LogStash
|
3
|
+
module Outputs
|
4
|
+
class S3
|
5
|
+
class WritableDirectoryValidator
|
6
|
+
def self.valid?(path)
|
7
|
+
begin
|
8
|
+
FileUtils.mkdir_p(path) unless Dir.exist?(path)
|
9
|
+
::File.writable?(path)
|
10
|
+
rescue
|
11
|
+
false
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "stud/temporary"
|
3
|
+
require "socket"
|
4
|
+
require "fileutils"
|
5
|
+
|
6
|
+
module LogStash
|
7
|
+
module Outputs
|
8
|
+
class S3
|
9
|
+
class WriteBucketPermissionValidator
|
10
|
+
attr_reader :logger
|
11
|
+
|
12
|
+
def initialize(logger)
|
13
|
+
@logger = logger
|
14
|
+
end
|
15
|
+
|
16
|
+
def valid?(bucket_resource, upload_options = {})
|
17
|
+
begin
|
18
|
+
upload_test_file(bucket_resource, upload_options)
|
19
|
+
true
|
20
|
+
rescue StandardError => e
|
21
|
+
logger.error("Error validating bucket write permissions!",
|
22
|
+
:message => e.message,
|
23
|
+
:class => e.class.name,
|
24
|
+
:backtrace => e.backtrace
|
25
|
+
)
|
26
|
+
false
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def upload_test_file(bucket_resource, upload_options = {})
|
32
|
+
generated_at = Time.now
|
33
|
+
|
34
|
+
key = "logstash-programmatic-access-test-object-#{generated_at}"
|
35
|
+
content = "Logstash permission check on #{generated_at}, by #{Socket.gethostname}"
|
36
|
+
|
37
|
+
begin
|
38
|
+
f = Stud::Temporary.file
|
39
|
+
f.write(content)
|
40
|
+
f.fsync
|
41
|
+
|
42
|
+
obj = bucket_resource.object(key)
|
43
|
+
obj.upload_file(f, upload_options)
|
44
|
+
|
45
|
+
begin
|
46
|
+
obj.delete
|
47
|
+
rescue
|
48
|
+
# Try to remove the files on the remote bucket,
|
49
|
+
# but don't raise any errors if that doesn't work.
|
50
|
+
# since we only really need `putobject`.
|
51
|
+
end
|
52
|
+
ensure
|
53
|
+
f.close
|
54
|
+
FileUtils.rm_rf(f.path)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|