logstash-output-s3-zst 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,114 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "forwardable"
4
+ require "fileutils"
5
+ require "logstash-output-s3_jars"
6
+
7
+ module LogStash
8
+ module Outputs
9
+ class S3
10
+
11
+ java_import 'org.logstash.plugins.outputs.s3.GzipUtil'
12
+
13
+ # Wrap the actual file descriptor into an utility class
14
+ # Make it more OOP and easier to reason with the paths.
15
+ class TemporaryFile
16
+ extend Forwardable
17
+
18
+ GZIP_EXTENSION = "txt.gz"
19
+ TXT_EXTENSION = "txt"
20
+ RECOVERED_FILE_NAME_TAG = "-recovered"
21
+
22
+ def_delegators :@fd, :path, :write, :close, :fsync
23
+
24
+ attr_reader :fd
25
+
26
+ def initialize(key, fd, temp_path)
27
+ @fd = fd
28
+ @key = key
29
+ @temp_path = temp_path
30
+ @created_at = Time.now
31
+ end
32
+
33
+ def ctime
34
+ @created_at
35
+ end
36
+
37
+ def temp_path
38
+ @temp_path
39
+ end
40
+
41
+ def size
42
+ # Use the fd size to get the accurate result,
43
+ # so we dont have to deal with fsync
44
+ # if the file is close, fd.size raises an IO exception so we use the File::size
45
+ begin
46
+ # fd is nil when LS tries to recover gzip file but fails
47
+ return 0 unless @fd != nil
48
+ @fd.size
49
+ rescue IOError
50
+ ::File.size(path)
51
+ end
52
+ end
53
+
54
+ def key
55
+ @key.gsub(/^\//, "")
56
+ end
57
+
58
+ # Each temporary file is created inside a directory named with an UUID,
59
+ # instead of deleting the file directly and having the risk of deleting other files
60
+ # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
61
+ # a sandbox.
62
+ def delete!
63
+ @fd.close rescue IOError # force close anyway
64
+ FileUtils.rm_r(@temp_path, :secure => true)
65
+ end
66
+
67
+ def empty?
68
+ size == 0
69
+ end
70
+
71
+ # only to cover the case where LS cannot restore corrupted file, file is not exist
72
+ def recoverable?
73
+ !@fd.nil?
74
+ end
75
+
76
+ def self.create_from_existing_file(file_path, temporary_folder)
77
+ key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
78
+
79
+ # recover gzip file and compress back before uploading to S3
80
+ if file_path.end_with?("." + GZIP_EXTENSION)
81
+ file_path = self.recover(file_path)
82
+ end
83
+ TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
84
+ ::File.exist?(file_path) ? ::File.open(file_path, "r") : nil, # for the nil case, file size will be 0 and upload will be ignored.
85
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
86
+ end
87
+
88
+ def self.gzip_extension
89
+ GZIP_EXTENSION
90
+ end
91
+
92
+ def self.text_extension
93
+ TXT_EXTENSION
94
+ end
95
+
96
+ def self.recovery_file_name_tag
97
+ RECOVERED_FILE_NAME_TAG
98
+ end
99
+
100
+ private
101
+ def self.recover(file_path)
102
+ full_gzip_extension = "." + GZIP_EXTENSION
103
+ recovered_txt_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + "." + TXT_EXTENSION)
104
+ recovered_gzip_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + full_gzip_extension)
105
+ GzipUtil.recover(file_path, recovered_txt_file_path)
106
+ if ::File.exist?(recovered_txt_file_path) && !::File.zero?(recovered_txt_file_path)
107
+ GzipUtil.compress(recovered_txt_file_path, recovered_gzip_file_path)
108
+ end
109
+ recovered_gzip_file_path
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,126 @@
1
+ # encoding: utf-8
2
+ require "socket"
3
+ require "securerandom"
4
+ require "fileutils"
5
+ require "zlib"
6
+ require "forwardable"
7
+
8
+ module LogStash
9
+ module Outputs
10
+ class S3
11
+ # Since the file can contains dynamic part, we have to handle a more local structure to
12
+ # allow a nice recovery from a crash.
13
+ #
14
+ # The local structure will look like this.
15
+ #
16
+ # <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
17
+ #
18
+ # Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
19
+ # I do not have to mess around to check if the other directory have file in it before destroying them.
20
+ class TemporaryFileFactory
21
+ FILE_MODE = "a"
22
+ STRFTIME = "%Y-%m-%dT%H.%M"
23
+
24
+ attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
25
+
26
+ def initialize(prefix, tags, encoding, temporary_directory)
27
+ @counter = 0
28
+ @prefix = prefix
29
+
30
+ @tags = tags
31
+ @encoding = encoding
32
+ @temporary_directory = temporary_directory
33
+ @lock = Mutex.new
34
+
35
+ rotate!
36
+ end
37
+
38
+ def rotate!
39
+ @lock.synchronize {
40
+ @current = new_file
41
+ increment_counter
42
+ @current
43
+ }
44
+ end
45
+
46
+ private
47
+ def extension
48
+ gzip? ? TemporaryFile.gzip_extension : TemporaryFile.text_extension
49
+ end
50
+
51
+ def gzip?
52
+ encoding == GZIP_ENCODING
53
+ end
54
+
55
+ def increment_counter
56
+ @counter += 1
57
+ end
58
+
59
+ def current_time
60
+ Time.now.strftime(STRFTIME)
61
+ end
62
+
63
+ def generate_name
64
+ filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
65
+
66
+ if tags.size > 0
67
+ "#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
68
+ else
69
+ "#{filename}.part#{counter}.#{extension}"
70
+ end
71
+ end
72
+
73
+ def new_file
74
+ uuid = SecureRandom.uuid
75
+ name = generate_name
76
+ path = ::File.join(temporary_directory, uuid)
77
+ key = ::File.join(prefix, name)
78
+
79
+ FileUtils.mkdir_p(::File.join(path, prefix))
80
+
81
+ io = if gzip?
82
+ # We have to use this wrapper because we cannot access the size of the
83
+ # file directly on the gzip writer.
84
+ IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
85
+ else
86
+ ::File.open(::File.join(path, key), FILE_MODE)
87
+ end
88
+
89
+ TemporaryFile.new(key, io, path)
90
+ end
91
+
92
+ class IOWrappedGzip
93
+ extend Forwardable
94
+
95
+ def_delegators :@gzip_writer, :write, :close
96
+ attr_reader :file_io, :gzip_writer
97
+
98
+ def initialize(file_io)
99
+ @file_io = file_io
100
+ @gzip_writer = Zlib::GzipWriter.new(file_io)
101
+ end
102
+
103
+ def path
104
+ @gzip_writer.to_io.path
105
+ end
106
+
107
+ def size
108
+ # to get the current file size
109
+ if @gzip_writer.pos == 0
110
+ # Ensure a zero file size is returned when nothing has
111
+ # yet been written to the gzip file.
112
+ 0
113
+ else
114
+ @gzip_writer.flush
115
+ @gzip_writer.to_io.size
116
+ end
117
+ end
118
+
119
+ def fsync
120
+ @gzip_writer.to_io.fsync
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class TimeRotationPolicy
6
+ attr_reader :time_file
7
+
8
+ def initialize(time_file)
9
+ if time_file <= 0
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
11
+ end
12
+
13
+ @time_file = time_file * 60
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size > 0 && (Time.now - file.ctime) >= time_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ true
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+ require "logstash/util"
3
+ require "aws-sdk"
4
+ require 'securerandom'
5
+ require 'zstd'
6
+
7
+ module LogStash
8
+ module Outputs
9
+ class S3
10
+ class Uploader
11
+
12
+ DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
13
+ :min_threads => 1,
14
+ :max_threads => 8,
15
+ :max_queue => 1,
16
+ :fallback_policy => :caller_runs
17
+ })
18
+
19
+ attr_reader :bucket, :upload_options, :logger
20
+
21
+ def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL, retry_count: Float::INFINITY, retry_delay: 1)
22
+ @bucket = bucket
23
+ @workers_pool = threadpool
24
+ @logger = logger
25
+ @retry_count = retry_count
26
+ @retry_delay = retry_delay
27
+ end
28
+
29
+ def upload_async(file, options = {})
30
+ @workers_pool.post do
31
+ LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
32
+ upload(file, options)
33
+ end
34
+ end
35
+
36
+ # uploads a TemporaryFile to S3
37
+ def upload(file, options = {})
38
+ upload_options = options.fetch(:upload_options, {})
39
+
40
+ zstd_compressed_file = "#{SecureRandom.uuid}.json.zst"
41
+
42
+ compressed = Zstd.compress_file(file.path, zstd_compressed_tempfile)
43
+
44
+ tries = 0
45
+ begin
46
+ obj = bucket.object(compressed.key)
47
+ obj.upload_file(compressed.path, upload_options)
48
+ rescue Errno::ENOENT => e
49
+ logger.error("File doesn't exist! Unrecoverable error.", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
50
+ rescue => e
51
+ # When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
52
+ # When the retry limit is reached or another error happen we will wait and retry.
53
+ #
54
+ # Thread might be stuck here, but I think its better than losing anything
55
+ # its either a transient errors or something bad really happened.
56
+ if tries < @retry_count
57
+ tries += 1
58
+ logger.warn("Uploading failed, retrying (##{tries} of #{@retry_count})", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
59
+ sleep @retry_delay
60
+ retry
61
+ else
62
+ logger.error("Failed to upload file (retried #{@retry_count} times).", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
63
+ end
64
+ end
65
+
66
+ begin
67
+ options[:on_complete].call(compressed) unless options[:on_complete].nil?
68
+ rescue => e
69
+ logger.error("An error occurred in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => compressed.path, :backtrace => e.backtrace)
70
+ raise e # reraise it since we don't deal with it now
71
+ end
72
+ end
73
+
74
+ def stop
75
+ @workers_pool.shutdown
76
+ @workers_pool.wait_for_termination(nil) # block until its done
77
+ end
78
+
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class WritableDirectoryValidator
6
+ def self.valid?(path)
7
+ begin
8
+ FileUtils.mkdir_p(path) unless Dir.exist?(path)
9
+ ::File.writable?(path)
10
+ rescue
11
+ false
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+ require "stud/temporary"
3
+ require "socket"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ class WriteBucketPermissionValidator
10
+ attr_reader :logger
11
+
12
+ def initialize(logger)
13
+ @logger = logger
14
+ end
15
+
16
+ def valid?(bucket_resource, upload_options = {})
17
+ begin
18
+ upload_test_file(bucket_resource, upload_options)
19
+ true
20
+ rescue StandardError => e
21
+ logger.error("Error validating bucket write permissions!",
22
+ :message => e.message,
23
+ :class => e.class.name,
24
+ :backtrace => e.backtrace
25
+ )
26
+ false
27
+ end
28
+ end
29
+
30
+ private
31
+ def upload_test_file(bucket_resource, upload_options = {})
32
+ generated_at = Time.now
33
+
34
+ key = "logstash-programmatic-access-test-object-#{generated_at}"
35
+ content = "Logstash permission check on #{generated_at}, by #{Socket.gethostname}"
36
+
37
+ begin
38
+ f = Stud::Temporary.file
39
+ f.write(content)
40
+ f.fsync
41
+
42
+ obj = bucket_resource.object(key)
43
+ obj.upload_file(f, upload_options)
44
+
45
+ begin
46
+ obj.delete
47
+ rescue
48
+ # Try to remove the files on the remote bucket,
49
+ # but don't raise any errors if that doesn't work.
50
+ # since we only really need `putobject`.
51
+ end
52
+ ensure
53
+ f.close
54
+ FileUtils.rm_rf(f.path)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end