logstash-output-s3-zst 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greater than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,114 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "forwardable"
4
+ require "fileutils"
5
+ require "logstash-output-s3_jars"
6
+
7
+ module LogStash
8
+ module Outputs
9
+ class S3
10
+
11
+ java_import 'org.logstash.plugins.outputs.s3.GzipUtil'
12
+
13
+ # Wrap the actual file descriptor into an utility class
14
+ # Make it more OOP and easier to reason with the paths.
15
+ class TemporaryFile
16
+ extend Forwardable
17
+
18
+ GZIP_EXTENSION = "txt.gz"
19
+ TXT_EXTENSION = "txt"
20
+ RECOVERED_FILE_NAME_TAG = "-recovered"
21
+
22
+ def_delegators :@fd, :path, :write, :close, :fsync
23
+
24
+ attr_reader :fd
25
+
26
+ def initialize(key, fd, temp_path)
27
+ @fd = fd
28
+ @key = key
29
+ @temp_path = temp_path
30
+ @created_at = Time.now
31
+ end
32
+
33
+ def ctime
34
+ @created_at
35
+ end
36
+
37
+ def temp_path
38
+ @temp_path
39
+ end
40
+
41
+ def size
42
+ # Use the fd size to get the accurate result,
43
+ # so we dont have to deal with fsync
44
+ # if the file is close, fd.size raises an IO exception so we use the File::size
45
+ begin
46
+ # fd is nil when LS tries to recover gzip file but fails
47
+ return 0 unless @fd != nil
48
+ @fd.size
49
+ rescue IOError
50
+ ::File.size(path)
51
+ end
52
+ end
53
+
54
+ def key
55
+ @key.gsub(/^\//, "")
56
+ end
57
+
58
+ # Each temporary file is created inside a directory named with an UUID,
59
+ # instead of deleting the file directly and having the risk of deleting other files
60
+ # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
61
+ # a sandbox.
62
+ def delete!
63
+ @fd.close rescue IOError # force close anyway
64
+ FileUtils.rm_r(@temp_path, :secure => true)
65
+ end
66
+
67
+ def empty?
68
+ size == 0
69
+ end
70
+
71
+ # only to cover the case where LS cannot restore corrupted file, file is not exist
72
+ def recoverable?
73
+ !@fd.nil?
74
+ end
75
+
76
+ def self.create_from_existing_file(file_path, temporary_folder)
77
+ key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
78
+
79
+ # recover gzip file and compress back before uploading to S3
80
+ if file_path.end_with?("." + GZIP_EXTENSION)
81
+ file_path = self.recover(file_path)
82
+ end
83
+ TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
84
+ ::File.exist?(file_path) ? ::File.open(file_path, "r") : nil, # for the nil case, file size will be 0 and upload will be ignored.
85
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
86
+ end
87
+
88
+ def self.gzip_extension
89
+ GZIP_EXTENSION
90
+ end
91
+
92
+ def self.text_extension
93
+ TXT_EXTENSION
94
+ end
95
+
96
+ def self.recovery_file_name_tag
97
+ RECOVERED_FILE_NAME_TAG
98
+ end
99
+
100
+ private
101
+ def self.recover(file_path)
102
+ full_gzip_extension = "." + GZIP_EXTENSION
103
+ recovered_txt_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + "." + TXT_EXTENSION)
104
+ recovered_gzip_file_path = file_path.gsub(full_gzip_extension, RECOVERED_FILE_NAME_TAG + full_gzip_extension)
105
+ GzipUtil.recover(file_path, recovered_txt_file_path)
106
+ if ::File.exist?(recovered_txt_file_path) && !::File.zero?(recovered_txt_file_path)
107
+ GzipUtil.compress(recovered_txt_file_path, recovered_gzip_file_path)
108
+ end
109
+ recovered_gzip_file_path
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
@@ -0,0 +1,126 @@
1
+ # encoding: utf-8
2
+ require "socket"
3
+ require "securerandom"
4
+ require "fileutils"
5
+ require "zlib"
6
+ require "forwardable"
7
+
8
+ module LogStash
9
+ module Outputs
10
+ class S3
11
+ # Since the file can contains dynamic part, we have to handle a more local structure to
12
+ # allow a nice recovery from a crash.
13
+ #
14
+ # The local structure will look like this.
15
+ #
16
+ # <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
17
+ #
18
+ # Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
19
+ # I do not have to mess around to check if the other directory have file in it before destroying them.
20
+ class TemporaryFileFactory
21
+ FILE_MODE = "a"
22
+ STRFTIME = "%Y-%m-%dT%H.%M"
23
+
24
+ attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
25
+
26
+ def initialize(prefix, tags, encoding, temporary_directory)
27
+ @counter = 0
28
+ @prefix = prefix
29
+
30
+ @tags = tags
31
+ @encoding = encoding
32
+ @temporary_directory = temporary_directory
33
+ @lock = Mutex.new
34
+
35
+ rotate!
36
+ end
37
+
38
+ def rotate!
39
+ @lock.synchronize {
40
+ @current = new_file
41
+ increment_counter
42
+ @current
43
+ }
44
+ end
45
+
46
+ private
47
+ def extension
48
+ gzip? ? TemporaryFile.gzip_extension : TemporaryFile.text_extension
49
+ end
50
+
51
+ def gzip?
52
+ encoding == GZIP_ENCODING
53
+ end
54
+
55
+ def increment_counter
56
+ @counter += 1
57
+ end
58
+
59
+ def current_time
60
+ Time.now.strftime(STRFTIME)
61
+ end
62
+
63
+ def generate_name
64
+ filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
65
+
66
+ if tags.size > 0
67
+ "#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
68
+ else
69
+ "#{filename}.part#{counter}.#{extension}"
70
+ end
71
+ end
72
+
73
+ def new_file
74
+ uuid = SecureRandom.uuid
75
+ name = generate_name
76
+ path = ::File.join(temporary_directory, uuid)
77
+ key = ::File.join(prefix, name)
78
+
79
+ FileUtils.mkdir_p(::File.join(path, prefix))
80
+
81
+ io = if gzip?
82
+ # We have to use this wrapper because we cannot access the size of the
83
+ # file directly on the gzip writer.
84
+ IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
85
+ else
86
+ ::File.open(::File.join(path, key), FILE_MODE)
87
+ end
88
+
89
+ TemporaryFile.new(key, io, path)
90
+ end
91
+
92
+ class IOWrappedGzip
93
+ extend Forwardable
94
+
95
+ def_delegators :@gzip_writer, :write, :close
96
+ attr_reader :file_io, :gzip_writer
97
+
98
+ def initialize(file_io)
99
+ @file_io = file_io
100
+ @gzip_writer = Zlib::GzipWriter.new(file_io)
101
+ end
102
+
103
+ def path
104
+ @gzip_writer.to_io.path
105
+ end
106
+
107
+ def size
108
+ # to get the current file size
109
+ if @gzip_writer.pos == 0
110
+ # Ensure a zero file size is returned when nothing has
111
+ # yet been written to the gzip file.
112
+ 0
113
+ else
114
+ @gzip_writer.flush
115
+ @gzip_writer.to_io.size
116
+ end
117
+ end
118
+
119
+ def fsync
120
+ @gzip_writer.to_io.fsync
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class TimeRotationPolicy
6
+ attr_reader :time_file
7
+
8
+ def initialize(time_file)
9
+ if time_file <= 0
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
11
+ end
12
+
13
+ @time_file = time_file * 60
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size > 0 && (Time.now - file.ctime) >= time_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ true
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+ require "logstash/util"
3
+ require "aws-sdk"
4
+ require 'securerandom'
5
+ require 'zstd'
6
+
7
+ module LogStash
8
+ module Outputs
9
+ class S3
10
+ class Uploader
11
+
12
+ DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
13
+ :min_threads => 1,
14
+ :max_threads => 8,
15
+ :max_queue => 1,
16
+ :fallback_policy => :caller_runs
17
+ })
18
+
19
+ attr_reader :bucket, :upload_options, :logger
20
+
21
+ def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL, retry_count: Float::INFINITY, retry_delay: 1)
22
+ @bucket = bucket
23
+ @workers_pool = threadpool
24
+ @logger = logger
25
+ @retry_count = retry_count
26
+ @retry_delay = retry_delay
27
+ end
28
+
29
+ def upload_async(file, options = {})
30
+ @workers_pool.post do
31
+ LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
32
+ upload(file, options)
33
+ end
34
+ end
35
+
36
+ # uploads a TemporaryFile to S3
37
+ def upload(file, options = {})
38
+ upload_options = options.fetch(:upload_options, {})
39
+
40
+ zstd_compressed_file = "#{SecureRandom.uuid}.json.zst"
41
+
42
+ compressed = Zstd.compress_file(file.path, zstd_compressed_tempfile)
43
+
44
+ tries = 0
45
+ begin
46
+ obj = bucket.object(compressed.key)
47
+ obj.upload_file(compressed.path, upload_options)
48
+ rescue Errno::ENOENT => e
49
+ logger.error("File doesn't exist! Unrecoverable error.", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
50
+ rescue => e
51
+ # When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
52
+ # When the retry limit is reached or another error happen we will wait and retry.
53
+ #
54
+ # Thread might be stuck here, but I think its better than losing anything
55
+ # its either a transient errors or something bad really happened.
56
+ if tries < @retry_count
57
+ tries += 1
58
+ logger.warn("Uploading failed, retrying (##{tries} of #{@retry_count})", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
59
+ sleep @retry_delay
60
+ retry
61
+ else
62
+ logger.error("Failed to upload file (retried #{@retry_count} times).", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
63
+ end
64
+ end
65
+
66
+ begin
67
+ options[:on_complete].call(compressed) unless options[:on_complete].nil?
68
+ rescue => e
69
+ logger.error("An error occurred in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => compressed.path, :backtrace => e.backtrace)
70
+ raise e # reraise it since we don't deal with it now
71
+ end
72
+ end
73
+
74
+ def stop
75
+ @workers_pool.shutdown
76
+ @workers_pool.wait_for_termination(nil) # block until its done
77
+ end
78
+
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class WritableDirectoryValidator
6
+ def self.valid?(path)
7
+ begin
8
+ FileUtils.mkdir_p(path) unless Dir.exist?(path)
9
+ ::File.writable?(path)
10
+ rescue
11
+ false
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,60 @@
1
+ # encoding: utf-8
2
+ require "stud/temporary"
3
+ require "socket"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ class WriteBucketPermissionValidator
10
+ attr_reader :logger
11
+
12
+ def initialize(logger)
13
+ @logger = logger
14
+ end
15
+
16
+ def valid?(bucket_resource, upload_options = {})
17
+ begin
18
+ upload_test_file(bucket_resource, upload_options)
19
+ true
20
+ rescue StandardError => e
21
+ logger.error("Error validating bucket write permissions!",
22
+ :message => e.message,
23
+ :class => e.class.name,
24
+ :backtrace => e.backtrace
25
+ )
26
+ false
27
+ end
28
+ end
29
+
30
+ private
31
+ def upload_test_file(bucket_resource, upload_options = {})
32
+ generated_at = Time.now
33
+
34
+ key = "logstash-programmatic-access-test-object-#{generated_at}"
35
+ content = "Logstash permission check on #{generated_at}, by #{Socket.gethostname}"
36
+
37
+ begin
38
+ f = Stud::Temporary.file
39
+ f.write(content)
40
+ f.fsync
41
+
42
+ obj = bucket_resource.object(key)
43
+ obj.upload_file(f, upload_options)
44
+
45
+ begin
46
+ obj.delete
47
+ rescue
48
+ # Try to remove the files on the remote bucket,
49
+ # but don't raise any errors if that doesn't work.
50
+ # since we only really need `putobject`.
51
+ end
52
+ ensure
53
+ f.close
54
+ FileUtils.rm_rf(f.path)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end