logstash-output-s3 3.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/lib/logstash/outputs/s3.rb +188 -308
  4. data/lib/logstash/outputs/s3/file_repository.rb +120 -0
  5. data/lib/logstash/outputs/s3/patch.rb +22 -0
  6. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  7. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  8. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  9. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  10. data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
  11. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  12. data/lib/logstash/outputs/s3/uploader.rb +59 -0
  13. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  14. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
  15. data/logstash-output-s3.gemspec +2 -2
  16. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  17. data/spec/integration/gzip_file_spec.rb +62 -0
  18. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  19. data/spec/integration/restore_from_crash_spec.rb +39 -0
  20. data/spec/integration/size_rotation_spec.rb +59 -0
  21. data/spec/integration/stress_test_spec.rb +60 -0
  22. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  23. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
  24. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  25. data/spec/outputs/s3/file_repository_spec.rb +146 -0
  26. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  27. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  28. data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
  29. data/spec/outputs/s3/temporary_file_spec.rb +40 -0
  30. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  31. data/spec/outputs/s3/uploader_spec.rb +57 -0
  32. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  33. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
  34. data/spec/outputs/s3_spec.rb +52 -335
  35. data/spec/spec_helper.rb +6 -0
  36. data/spec/supports/helpers.rb +33 -9
  37. metadata +65 -4
  38. data/spec/integration/s3_spec.rb +0 -97
@@ -0,0 +1,120 @@
1
+ # encoding: utf-8
2
+ require "java"
3
+ require "concurrent"
4
+ require "concurrent/timer_task"
5
+ require "logstash/util"
6
+
7
+ ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
8
+
9
+ module LogStash
10
+ module Outputs
11
+ class S3
12
+ class FileRepository
13
+ DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
14
+ DEFAULT_STALE_TIME_SECS = 15 * 60
15
+ # Ensure that all access or work done
16
+ # on a factory is threadsafe
17
+ class PrefixedValue
18
+ def initialize(file_factory, stale_time)
19
+ @file_factory = file_factory
20
+ @lock = Mutex.new
21
+ @stale_time = stale_time
22
+ end
23
+
24
+ def with_lock
25
+ @lock.synchronize {
26
+ yield @file_factory
27
+ }
28
+ end
29
+
30
+ def stale?
31
+ with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
32
+ end
33
+
34
+ def apply(prefix)
35
+ return self
36
+ end
37
+
38
+ def delete!
39
+ with_lock{ |factory| factory.current.delete! }
40
+ end
41
+ end
42
+
43
+ class FactoryInitializer
44
+ def initialize(tags, encoding, temporary_directory, stale_time)
45
+ @tags = tags
46
+ @encoding = encoding
47
+ @temporary_directory = temporary_directory
48
+ @stale_time = stale_time
49
+ end
50
+
51
+ def apply(prefix_key)
52
+ PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
53
+ end
54
+ end
55
+
56
+ def initialize(tags, encoding, temporary_directory,
57
+ stale_time = DEFAULT_STALE_TIME_SECS,
58
+ sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
59
+ # The path need to contains the prefix so when we start
60
+ # logtash after a crash we keep the remote structure
61
+ @prefixed_factories = ConcurrentHashMap.new
62
+
63
+ @sweeper_interval = sweeper_interval
64
+
65
+ @factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
66
+
67
+ start_stale_sweeper
68
+ end
69
+
70
+ def keys
71
+ @prefixed_factories.keySet
72
+ end
73
+
74
+ def each_files
75
+ @prefixed_factories.elements.each do |prefixed_file|
76
+ prefixed_file.with_lock { |factory| yield factory.current }
77
+ end
78
+ end
79
+
80
+ # Return the file factory
81
+ def get_factory(prefix_key)
82
+ @prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
83
+ end
84
+
85
+ def get_file(prefix_key)
86
+ get_factory(prefix_key) { |factory| yield factory.current }
87
+ end
88
+
89
+ def shutdown
90
+ stop_stale_sweeper
91
+ end
92
+
93
+ def size
94
+ @prefixed_factories.size
95
+ end
96
+
97
+ def remove_stale(k, v)
98
+ if v.stale?
99
+ @prefixed_factories.remove(k, v)
100
+ v.delete!
101
+ end
102
+ end
103
+
104
+ def start_stale_sweeper
105
+ @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
106
+ LogStash::Util.set_thread_name("S3, Stale factory sweeper")
107
+
108
+ @prefixed_factories.forEach{|k,v| remove_stale(k,v)}
109
+ end
110
+
111
+ @stale_sweeper.execute
112
+ end
113
+
114
+ def stop_stale_sweeper
115
+ @stale_sweeper.shutdown
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,22 @@
1
+ # This is patch related to the autoloading and ruby
2
+ #
3
+ # The fix exist in jruby 9k but not in the current jruby, not sure when or it will be backported
4
+ # https://github.com/jruby/jruby/issues/3645
5
+ #
6
+ # AWS is doing tricky name discovery in the module to generate the correct error class and
7
+ # this strategy is bogus in jruby and `eager_autoload` don't fix this issue.
8
+ #
9
+ # This will be a short lived patch since AWS is removing the need.
10
+ # see: https://github.com/aws/aws-sdk-ruby/issues/1301#issuecomment-261115960
11
+ old_stderr = $stderr
12
+
13
+ $stderr = StringIO.new
14
+ begin
15
+ module Aws
16
+ const_set(:S3, Aws::S3)
17
+ end
18
+ ensure
19
+ $stderr = old_stderr
20
+ end
21
+
22
+
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class PathValidator
6
+ INVALID_CHARACTERS = "\^`><"
7
+
8
+ def self.valid?(name)
9
+ name.match(matches_re).nil?
10
+ end
11
+
12
+ def self.matches_re
13
+ /[#{Regexp.escape(INVALID_CHARACTERS)}]/
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/s3/size_rotation_policy"
3
+ require "logstash/outputs/s3/time_rotation_policy"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class SizeAndTimeRotationPolicy
9
+ def initialize(file_size, time_file)
10
+ @size_strategy = SizeRotationPolicy.new(file_size)
11
+ @time_strategy = TimeRotationPolicy.new(time_file)
12
+ end
13
+
14
+ def rotate?(file)
15
+ @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
16
+ end
17
+
18
+ def needs_periodic?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "forwardable"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ # Wrap the actual file descriptor into an utility classe
10
+ # It make it more OOP and easier to reason with the paths.
11
+ class TemporaryFile
12
+ extend Forwardable
13
+
14
+ def_delegators :@fd, :path, :write, :close, :fsync
15
+
16
+ attr_reader :fd
17
+
18
+ def initialize(key, fd, temp_path)
19
+ @fd = fd
20
+ @key = key
21
+ @temp_path = temp_path
22
+ @created_at = Time.now
23
+ end
24
+
25
+ def ctime
26
+ @created_at
27
+ end
28
+
29
+ def temp_path
30
+ @temp_path
31
+ end
32
+
33
+ def size
34
+ # Use the fd size to get the accurate result,
35
+ # so we dont have to deal with fsync
36
+ # if the file is close we will use the File::size
37
+ begin
38
+ @fd.size
39
+ rescue IOError
40
+ ::File.size(path)
41
+ end
42
+ end
43
+
44
+ def key
45
+ @key.gsub(/^\//, "")
46
+ end
47
+
48
+ # Each temporary file is made inside a directory named with an UUID,
49
+ # instead of deleting the file directly and having the risk of deleting other files
50
+ # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
+ # a sandbox.
52
+ def delete!
53
+ @fd.close
54
+ ::FileUtils.rm_rf(@temp_path, :secure => true)
55
+ end
56
+
57
+ def empty?
58
+ size == 0
59
+ end
60
+
61
+ def self.create_from_existing_file(file_path, temporary_folder)
62
+ key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
+
64
+ TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
+ ::File.open(file_path, "r"),
66
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,123 @@
1
+ # encoding: utf-8
2
+ require "socket"
3
+ require "securerandom"
4
+ require "fileutils"
5
+ require "zlib"
6
+ require "forwardable"
7
+
8
+ module LogStash
9
+ module Outputs
10
+ class S3
11
+ # Since the file can contains dynamic part, we have to handle a more local structure to
12
+ # allow a nice recovery from a crash.
13
+ #
14
+ # The local structure will look like this.
15
+ #
16
+ # <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
17
+ #
18
+ # Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
19
+ # I do not have to mess around to check if the other directory have file in it before destroying them.
20
+ class TemporaryFileFactory
21
+ FILE_MODE = "a"
22
+ GZIP_ENCODING = "gzip"
23
+ GZIP_EXTENSION = "txt.gz"
24
+ TXT_EXTENSION = "txt"
25
+ STRFTIME = "%Y-%m-%dT%H.%M"
26
+
27
+ attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
28
+
29
+ def initialize(prefix, tags, encoding, temporary_directory)
30
+ @counter = 0
31
+ @prefix = prefix
32
+
33
+ @tags = tags
34
+ @encoding = encoding
35
+ @temporary_directory = temporary_directory
36
+ @lock = Mutex.new
37
+
38
+ rotate!
39
+ end
40
+
41
+ def rotate!
42
+ @lock.synchronize {
43
+ @current = new_file
44
+ increment_counter
45
+ @current
46
+ }
47
+ end
48
+
49
+ private
50
+ def extension
51
+ gzip? ? GZIP_EXTENSION : TXT_EXTENSION
52
+ end
53
+
54
+ def gzip?
55
+ encoding == GZIP_ENCODING
56
+ end
57
+
58
+ def increment_counter
59
+ @counter += 1
60
+ end
61
+
62
+ def current_time
63
+ Time.now.strftime(STRFTIME)
64
+ end
65
+
66
+ def generate_name
67
+ filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
68
+
69
+ if tags.size > 0
70
+ "#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
71
+ else
72
+ "#{filename}.part#{counter}.#{extension}"
73
+ end
74
+ end
75
+
76
+ def new_file
77
+ uuid = SecureRandom.uuid
78
+ name = generate_name
79
+ path = ::File.join(temporary_directory, uuid)
80
+ key = ::File.join(prefix, name)
81
+
82
+ FileUtils.mkdir_p(::File.join(path, prefix))
83
+
84
+ io = if gzip?
85
+ # We have to use this wrapper because we cannot access the size of the
86
+ # file directly on the gzip writer.
87
+ IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
88
+ else
89
+ ::File.open(::File.join(path, key), FILE_MODE)
90
+ end
91
+
92
+ TemporaryFile.new(key, io, path)
93
+ end
94
+
95
+ class IOWrappedGzip
96
+ extend Forwardable
97
+
98
+ def_delegators :@gzip_writer, :write, :close
99
+ attr_reader :file_io, :gzip_writer
100
+
101
+ def initialize(file_io)
102
+ @file_io = file_io
103
+ @gzip_writer = Zlib::GzipWriter.open(file_io)
104
+ end
105
+
106
+ def path
107
+ @gzip_writer.to_io.path
108
+ end
109
+
110
+ def size
111
+ # to get the current file size
112
+ @gzip_writer.flush
113
+ @gzip_writer.to_io.size
114
+ end
115
+
116
+ def fsync
117
+ @gzip_writer.to_io.fsync
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class TimeRotationPolicy
6
+ attr_reader :time_file
7
+
8
+ def initialize(time_file)
9
+ if time_file <= 0
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
11
+ end
12
+
13
+ @time_file = time_file * 60
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size > 0 && (Time.now - file.ctime) >= time_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ true
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+ require "logstash/util"
3
+ require "aws-sdk"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class Uploader
9
+ TIME_BEFORE_RETRYING_SECONDS = 1
10
+ DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
11
+ :min_threads => 1,
12
+ :max_threads => 8,
13
+ :max_queue => 1,
14
+ :fallback_policy => :caller_runs
15
+ })
16
+
17
+
18
+ attr_reader :bucket, :upload_options, :logger
19
+
20
+ def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL)
21
+ @bucket = bucket
22
+ @workers_pool = threadpool
23
+ @logger = logger
24
+ end
25
+
26
+ def upload_async(file, options = {})
27
+ @workers_pool.post do
28
+ LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
29
+ upload(file, options)
30
+ end
31
+ end
32
+
33
+ def upload(file, options = {})
34
+ upload_options = options.fetch(:upload_options, {})
35
+
36
+ begin
37
+ obj = bucket.object(file.key)
38
+ obj.upload_file(file.path, upload_options)
39
+ rescue => e
40
+ # When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
41
+ # When the retry limit is reached or another error happen we will wait and retry.
42
+ #
43
+ # Thread might be stuck here, but I think its better than losing anything
44
+ # its either a transient errors or something bad really happened.
45
+ logger.error("Uploading failed, retrying", :exception => e, :path => file.path, :backtrace => e.backtrace)
46
+ retry
47
+ end
48
+
49
+ options[:on_complete].call(file) unless options[:on_complete].nil?
50
+ end
51
+
52
+ def stop
53
+ @workers_pool.shutdown
54
+ @workers_pool.wait_for_termination(nil) # block until its done
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end