logstash-output-s3 3.2.0 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +21 -0
  3. data/lib/logstash/outputs/s3.rb +188 -308
  4. data/lib/logstash/outputs/s3/file_repository.rb +120 -0
  5. data/lib/logstash/outputs/s3/patch.rb +22 -0
  6. data/lib/logstash/outputs/s3/path_validator.rb +18 -0
  7. data/lib/logstash/outputs/s3/size_and_time_rotation_policy.rb +24 -0
  8. data/lib/logstash/outputs/s3/size_rotation_policy.rb +26 -0
  9. data/lib/logstash/outputs/s3/temporary_file.rb +71 -0
  10. data/lib/logstash/outputs/s3/temporary_file_factory.rb +123 -0
  11. data/lib/logstash/outputs/s3/time_rotation_policy.rb +26 -0
  12. data/lib/logstash/outputs/s3/uploader.rb +59 -0
  13. data/lib/logstash/outputs/s3/writable_directory_validator.rb +17 -0
  14. data/lib/logstash/outputs/s3/write_bucket_permission_validator.rb +49 -0
  15. data/logstash-output-s3.gemspec +2 -2
  16. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  17. data/spec/integration/gzip_file_spec.rb +62 -0
  18. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  19. data/spec/integration/restore_from_crash_spec.rb +39 -0
  20. data/spec/integration/size_rotation_spec.rb +59 -0
  21. data/spec/integration/stress_test_spec.rb +60 -0
  22. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  23. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +60 -0
  24. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  25. data/spec/outputs/s3/file_repository_spec.rb +146 -0
  26. data/spec/outputs/s3/size_and_time_rotation_policy_spec.rb +77 -0
  27. data/spec/outputs/s3/size_rotation_policy_spec.rb +41 -0
  28. data/spec/outputs/s3/temporary_file_factory_spec.rb +85 -0
  29. data/spec/outputs/s3/temporary_file_spec.rb +40 -0
  30. data/spec/outputs/s3/time_rotation_policy_spec.rb +60 -0
  31. data/spec/outputs/s3/uploader_spec.rb +57 -0
  32. data/spec/outputs/s3/writable_directory_validator_spec.rb +40 -0
  33. data/spec/outputs/s3/write_bucket_permission_validator_spec.rb +38 -0
  34. data/spec/outputs/s3_spec.rb +52 -335
  35. data/spec/spec_helper.rb +6 -0
  36. data/spec/supports/helpers.rb +33 -9
  37. metadata +65 -4
  38. data/spec/integration/s3_spec.rb +0 -97
@@ -0,0 +1,120 @@
1
+ # encoding: utf-8
2
+ require "java"
3
+ require "concurrent"
4
+ require "concurrent/timer_task"
5
+ require "logstash/util"
6
+
7
+ ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
8
+
9
+ module LogStash
10
+ module Outputs
11
+ class S3
12
+ class FileRepository
13
+ DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
14
+ DEFAULT_STALE_TIME_SECS = 15 * 60
15
+ # Ensure that all access or work done
16
+ # on a factory is threadsafe
17
+ class PrefixedValue
18
+ def initialize(file_factory, stale_time)
19
+ @file_factory = file_factory
20
+ @lock = Mutex.new
21
+ @stale_time = stale_time
22
+ end
23
+
24
+ def with_lock
25
+ @lock.synchronize {
26
+ yield @file_factory
27
+ }
28
+ end
29
+
30
+ def stale?
31
+ with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
32
+ end
33
+
34
+ def apply(prefix)
35
+ return self
36
+ end
37
+
38
+ def delete!
39
+ with_lock{ |factory| factory.current.delete! }
40
+ end
41
+ end
42
+
43
+ class FactoryInitializer
44
+ def initialize(tags, encoding, temporary_directory, stale_time)
45
+ @tags = tags
46
+ @encoding = encoding
47
+ @temporary_directory = temporary_directory
48
+ @stale_time = stale_time
49
+ end
50
+
51
+ def apply(prefix_key)
52
+ PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
53
+ end
54
+ end
55
+
56
+ def initialize(tags, encoding, temporary_directory,
57
+ stale_time = DEFAULT_STALE_TIME_SECS,
58
+ sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
59
+ # The path need to contains the prefix so when we start
60
+ # logtash after a crash we keep the remote structure
61
+ @prefixed_factories = ConcurrentHashMap.new
62
+
63
+ @sweeper_interval = sweeper_interval
64
+
65
+ @factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
66
+
67
+ start_stale_sweeper
68
+ end
69
+
70
+ def keys
71
+ @prefixed_factories.keySet
72
+ end
73
+
74
+ def each_files
75
+ @prefixed_factories.elements.each do |prefixed_file|
76
+ prefixed_file.with_lock { |factory| yield factory.current }
77
+ end
78
+ end
79
+
80
+ # Return the file factory
81
+ def get_factory(prefix_key)
82
+ @prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
83
+ end
84
+
85
+ def get_file(prefix_key)
86
+ get_factory(prefix_key) { |factory| yield factory.current }
87
+ end
88
+
89
+ def shutdown
90
+ stop_stale_sweeper
91
+ end
92
+
93
+ def size
94
+ @prefixed_factories.size
95
+ end
96
+
97
+ def remove_stale(k, v)
98
+ if v.stale?
99
+ @prefixed_factories.remove(k, v)
100
+ v.delete!
101
+ end
102
+ end
103
+
104
+ def start_stale_sweeper
105
+ @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
106
+ LogStash::Util.set_thread_name("S3, Stale factory sweeper")
107
+
108
+ @prefixed_factories.forEach{|k,v| remove_stale(k,v)}
109
+ end
110
+
111
+ @stale_sweeper.execute
112
+ end
113
+
114
+ def stop_stale_sweeper
115
+ @stale_sweeper.shutdown
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,22 @@
1
+ # This is patch related to the autoloading and ruby
2
+ #
3
+ # The fix exist in jruby 9k but not in the current jruby, not sure when or it will be backported
4
+ # https://github.com/jruby/jruby/issues/3645
5
+ #
6
+ # AWS is doing tricky name discovery in the module to generate the correct error class and
7
+ # this strategy is bogus in jruby and `eager_autoload` don't fix this issue.
8
+ #
9
+ # This will be a short lived patch since AWS is removing the need.
10
+ # see: https://github.com/aws/aws-sdk-ruby/issues/1301#issuecomment-261115960
11
+ old_stderr = $stderr
12
+
13
+ $stderr = StringIO.new
14
+ begin
15
+ module Aws
16
+ const_set(:S3, Aws::S3)
17
+ end
18
+ ensure
19
+ $stderr = old_stderr
20
+ end
21
+
22
+
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class PathValidator
6
+ INVALID_CHARACTERS = "\^`><"
7
+
8
+ def self.valid?(name)
9
+ name.match(matches_re).nil?
10
+ end
11
+
12
+ def self.matches_re
13
+ /[#{Regexp.escape(INVALID_CHARACTERS)}]/
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/s3/size_rotation_policy"
3
+ require "logstash/outputs/s3/time_rotation_policy"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class SizeAndTimeRotationPolicy
9
+ def initialize(file_size, time_file)
10
+ @size_strategy = SizeRotationPolicy.new(file_size)
11
+ @time_strategy = TimeRotationPolicy.new(time_file)
12
+ end
13
+
14
+ def rotate?(file)
15
+ @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
16
+ end
17
+
18
+ def needs_periodic?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "forwardable"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class S3
9
+ # Wrap the actual file descriptor into an utility classe
10
+ # It make it more OOP and easier to reason with the paths.
11
+ class TemporaryFile
12
+ extend Forwardable
13
+
14
+ def_delegators :@fd, :path, :write, :close, :fsync
15
+
16
+ attr_reader :fd
17
+
18
+ def initialize(key, fd, temp_path)
19
+ @fd = fd
20
+ @key = key
21
+ @temp_path = temp_path
22
+ @created_at = Time.now
23
+ end
24
+
25
+ def ctime
26
+ @created_at
27
+ end
28
+
29
+ def temp_path
30
+ @temp_path
31
+ end
32
+
33
+ def size
34
+ # Use the fd size to get the accurate result,
35
+ # so we dont have to deal with fsync
36
+ # if the file is close we will use the File::size
37
+ begin
38
+ @fd.size
39
+ rescue IOError
40
+ ::File.size(path)
41
+ end
42
+ end
43
+
44
+ def key
45
+ @key.gsub(/^\//, "")
46
+ end
47
+
48
+ # Each temporary file is made inside a directory named with an UUID,
49
+ # instead of deleting the file directly and having the risk of deleting other files
50
+ # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
+ # a sandbox.
52
+ def delete!
53
+ @fd.close
54
+ ::FileUtils.rm_rf(@temp_path, :secure => true)
55
+ end
56
+
57
+ def empty?
58
+ size == 0
59
+ end
60
+
61
+ def self.create_from_existing_file(file_path, temporary_folder)
62
+ key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
+
64
+ TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
+ ::File.open(file_path, "r"),
66
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,123 @@
1
+ # encoding: utf-8
2
+ require "socket"
3
+ require "securerandom"
4
+ require "fileutils"
5
+ require "zlib"
6
+ require "forwardable"
7
+
8
+ module LogStash
9
+ module Outputs
10
+ class S3
11
+ # Since the file can contains dynamic part, we have to handle a more local structure to
12
+ # allow a nice recovery from a crash.
13
+ #
14
+ # The local structure will look like this.
15
+ #
16
+ # <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
17
+ #
18
+ # Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
19
+ # I do not have to mess around to check if the other directory have file in it before destroying them.
20
+ class TemporaryFileFactory
21
+ FILE_MODE = "a"
22
+ GZIP_ENCODING = "gzip"
23
+ GZIP_EXTENSION = "txt.gz"
24
+ TXT_EXTENSION = "txt"
25
+ STRFTIME = "%Y-%m-%dT%H.%M"
26
+
27
+ attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
28
+
29
+ def initialize(prefix, tags, encoding, temporary_directory)
30
+ @counter = 0
31
+ @prefix = prefix
32
+
33
+ @tags = tags
34
+ @encoding = encoding
35
+ @temporary_directory = temporary_directory
36
+ @lock = Mutex.new
37
+
38
+ rotate!
39
+ end
40
+
41
+ def rotate!
42
+ @lock.synchronize {
43
+ @current = new_file
44
+ increment_counter
45
+ @current
46
+ }
47
+ end
48
+
49
+ private
50
+ def extension
51
+ gzip? ? GZIP_EXTENSION : TXT_EXTENSION
52
+ end
53
+
54
+ def gzip?
55
+ encoding == GZIP_ENCODING
56
+ end
57
+
58
+ def increment_counter
59
+ @counter += 1
60
+ end
61
+
62
+ def current_time
63
+ Time.now.strftime(STRFTIME)
64
+ end
65
+
66
+ def generate_name
67
+ filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
68
+
69
+ if tags.size > 0
70
+ "#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
71
+ else
72
+ "#{filename}.part#{counter}.#{extension}"
73
+ end
74
+ end
75
+
76
+ def new_file
77
+ uuid = SecureRandom.uuid
78
+ name = generate_name
79
+ path = ::File.join(temporary_directory, uuid)
80
+ key = ::File.join(prefix, name)
81
+
82
+ FileUtils.mkdir_p(::File.join(path, prefix))
83
+
84
+ io = if gzip?
85
+ # We have to use this wrapper because we cannot access the size of the
86
+ # file directly on the gzip writer.
87
+ IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
88
+ else
89
+ ::File.open(::File.join(path, key), FILE_MODE)
90
+ end
91
+
92
+ TemporaryFile.new(key, io, path)
93
+ end
94
+
95
+ class IOWrappedGzip
96
+ extend Forwardable
97
+
98
+ def_delegators :@gzip_writer, :write, :close
99
+ attr_reader :file_io, :gzip_writer
100
+
101
+ def initialize(file_io)
102
+ @file_io = file_io
103
+ @gzip_writer = Zlib::GzipWriter.open(file_io)
104
+ end
105
+
106
+ def path
107
+ @gzip_writer.to_io.path
108
+ end
109
+
110
+ def size
111
+ # to get the current file size
112
+ @gzip_writer.flush
113
+ @gzip_writer.to_io.size
114
+ end
115
+
116
+ def fsync
117
+ @gzip_writer.to_io.fsync
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class S3
5
+ class TimeRotationPolicy
6
+ attr_reader :time_file
7
+
8
+ def initialize(time_file)
9
+ if time_file <= 0
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
11
+ end
12
+
13
+ @time_file = time_file * 60
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size > 0 && (Time.now - file.ctime) >= time_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ true
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+ require "logstash/util"
3
+ require "aws-sdk"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class S3
8
+ class Uploader
9
+ TIME_BEFORE_RETRYING_SECONDS = 1
10
+ DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
11
+ :min_threads => 1,
12
+ :max_threads => 8,
13
+ :max_queue => 1,
14
+ :fallback_policy => :caller_runs
15
+ })
16
+
17
+
18
+ attr_reader :bucket, :upload_options, :logger
19
+
20
+ def initialize(bucket, logger, threadpool = DEFAULT_THREADPOOL)
21
+ @bucket = bucket
22
+ @workers_pool = threadpool
23
+ @logger = logger
24
+ end
25
+
26
+ def upload_async(file, options = {})
27
+ @workers_pool.post do
28
+ LogStash::Util.set_thread_name("S3 output uploader, file: #{file.path}")
29
+ upload(file, options)
30
+ end
31
+ end
32
+
33
+ def upload(file, options = {})
34
+ upload_options = options.fetch(:upload_options, {})
35
+
36
+ begin
37
+ obj = bucket.object(file.key)
38
+ obj.upload_file(file.path, upload_options)
39
+ rescue => e
40
+ # When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
41
+ # When the retry limit is reached or another error happen we will wait and retry.
42
+ #
43
+ # Thread might be stuck here, but I think its better than losing anything
44
+ # its either a transient errors or something bad really happened.
45
+ logger.error("Uploading failed, retrying", :exception => e, :path => file.path, :backtrace => e.backtrace)
46
+ retry
47
+ end
48
+
49
+ options[:on_complete].call(file) unless options[:on_complete].nil?
50
+ end
51
+
52
+ def stop
53
+ @workers_pool.shutdown
54
+ @workers_pool.wait_for_termination(nil) # block until its done
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end