logstash-output-swift 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +2 -0
  3. data/CONTRIBUTORS +22 -0
  4. data/DEVELOPER.md +15 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE +201 -0
  7. data/NOTICE.TXT +2 -0
  8. data/README.md +38 -0
  9. data/lib/logstash/outputs/swift.rb +360 -0
  10. data/lib/logstash/outputs/swift/file_repository.rb +121 -0
  11. data/lib/logstash/outputs/swift/path_validator.rb +18 -0
  12. data/lib/logstash/outputs/swift/size_and_time_rotation_policy.rb +24 -0
  13. data/lib/logstash/outputs/swift/size_rotation_policy.rb +26 -0
  14. data/lib/logstash/outputs/swift/temporary_file.rb +71 -0
  15. data/lib/logstash/outputs/swift/temporary_file_factory.rb +129 -0
  16. data/lib/logstash/outputs/swift/time_rotation_policy.rb +26 -0
  17. data/lib/logstash/outputs/swift/uploader.rb +64 -0
  18. data/lib/logstash/outputs/swift/writable_directory_validator.rb +17 -0
  19. data/lib/logstash/outputs/swift/write_container_permission_validator.rb +52 -0
  20. data/logstash-output-swift.gemspec +28 -0
  21. data/spec/integration/dynamic_prefix_spec.rb +92 -0
  22. data/spec/integration/gzip_file_spec.rb +62 -0
  23. data/spec/integration/gzip_size_rotation_spec.rb +63 -0
  24. data/spec/integration/restore_from_crash_spec.rb +39 -0
  25. data/spec/integration/size_rotation_spec.rb +59 -0
  26. data/spec/integration/stress_test_spec.rb +60 -0
  27. data/spec/integration/time_based_rotation_with_constant_write_spec.rb +60 -0
  28. data/spec/integration/time_based_rotation_with_stale_write_spec.rb +64 -0
  29. data/spec/integration/upload_current_file_on_shutdown_spec.rb +51 -0
  30. data/spec/outputs/swift/file_repository_spec.rb +143 -0
  31. data/spec/outputs/swift/size_and_time_rotation_policy_spec.rb +77 -0
  32. data/spec/outputs/swift/size_rotation_policy_spec.rb +41 -0
  33. data/spec/outputs/swift/temporary_file_factory_spec.rb +89 -0
  34. data/spec/outputs/swift/temporary_file_spec.rb +47 -0
  35. data/spec/outputs/swift/time_rotation_policy_spec.rb +60 -0
  36. data/spec/outputs/swift/uploader_spec.rb +49 -0
  37. data/spec/outputs/swift/writable_directory_validator_spec.rb +40 -0
  38. data/spec/outputs/swift/write_container_permission_validator_spec.rb +38 -0
  39. data/spec/outputs/swift_spec.rb +76 -0
  40. data/spec/spec_helper.rb +48 -0
  41. data/spec/supports/helpers.rb +38 -0
  42. metadata +212 -0
@@ -0,0 +1,121 @@
1
+ # encoding: utf-8
2
+ require "java"
3
+ require "concurrent"
4
+ require "concurrent/timer_task"
5
+ require "logstash/util"
6
+
7
+ ConcurrentHashMap = java.util.concurrent.ConcurrentHashMap
8
+
9
+ module LogStash
10
+ module Outputs
11
+ class Swift
12
+ class FileRepository
13
+ DEFAULT_STATE_SWEEPER_INTERVAL_SECS = 60
14
+ DEFAULT_STALE_TIME_SECS = 15 * 60
15
+ # Ensure that all access or work done
16
+ # on a factory is threadsafe
17
+ class PrefixedValue
18
+ def initialize(file_factory, stale_time)
19
+ @file_factory = file_factory
20
+ @lock = Mutex.new
21
+ @stale_time = stale_time
22
+ end
23
+
24
+ def with_lock
25
+ @lock.synchronize {
26
+ yield @file_factory
27
+ }
28
+ end
29
+
30
+ def stale?
31
+ with_lock { |factory| factory.current.size == 0 && (Time.now - factory.current.ctime > @stale_time) }
32
+ end
33
+
34
+ def apply(prefix)
35
+ return self
36
+ end
37
+
38
+ def delete!
39
+ with_lock{ |factory| factory.current.delete! }
40
+ end
41
+ end
42
+
43
+ class FactoryInitializer
44
+ include java.util.function.Function
45
+ def initialize(tags, encoding, temporary_directory, stale_time)
46
+ @tags = tags
47
+ @encoding = encoding
48
+ @temporary_directory = temporary_directory
49
+ @stale_time = stale_time
50
+ end
51
+
52
+ def apply(prefix_key)
53
+ PrefixedValue.new(TemporaryFileFactory.new(prefix_key, @tags, @encoding, @temporary_directory), @stale_time)
54
+ end
55
+ end
56
+
57
+ def initialize(tags, encoding, temporary_directory,
58
+ stale_time = DEFAULT_STALE_TIME_SECS,
59
+ sweeper_interval = DEFAULT_STATE_SWEEPER_INTERVAL_SECS)
60
+ # The path need to contains the prefix so when we start
61
+ # logtash after a crash we keep the remote structure
62
+ @prefixed_factories = ConcurrentHashMap.new
63
+
64
+ @sweeper_interval = sweeper_interval
65
+
66
+ @factory_initializer = FactoryInitializer.new(tags, encoding, temporary_directory, stale_time)
67
+
68
+ start_stale_sweeper
69
+ end
70
+
71
+ def keys
72
+ @prefixed_factories.keySet
73
+ end
74
+
75
+ def each_files
76
+ @prefixed_factories.elements.each do |prefixed_file|
77
+ prefixed_file.with_lock { |factory| yield factory.current }
78
+ end
79
+ end
80
+
81
+ # Return the file factory
82
+ def get_factory(prefix_key)
83
+ @prefixed_factories.computeIfAbsent(prefix_key, @factory_initializer).with_lock { |factory| yield factory }
84
+ end
85
+
86
+ def get_file(prefix_key)
87
+ get_factory(prefix_key) { |factory| yield factory.current }
88
+ end
89
+
90
+ def shutdown
91
+ stop_stale_sweeper
92
+ end
93
+
94
+ def size
95
+ @prefixed_factories.size
96
+ end
97
+
98
+ def remove_stale(k, v)
99
+ if v.stale?
100
+ @prefixed_factories.remove(k, v)
101
+ v.delete!
102
+ end
103
+ end
104
+
105
+ def start_stale_sweeper
106
+ @stale_sweeper = Concurrent::TimerTask.new(:execution_interval => @sweeper_interval) do
107
+ LogStash::Util.set_thread_name("S3, Stale factory sweeper")
108
+
109
+ @prefixed_factories.forEach{|k,v| remove_stale(k,v)}
110
+ end
111
+
112
+ @stale_sweeper.execute
113
+ end
114
+
115
+ def stop_stale_sweeper
116
+ @stale_sweeper.shutdown
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class Swift
5
+ class PathValidator
6
+ INVALID_CHARACTERS = "\^`><"
7
+
8
+ def self.valid?(name)
9
+ name.match(matches_re).nil?
10
+ end
11
+
12
+ def self.matches_re
13
+ /[#{Regexp.escape(INVALID_CHARACTERS)}]/
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require "logstash/outputs/swift/size_rotation_policy"
3
+ require "logstash/outputs/swift/time_rotation_policy"
4
+
5
+ module LogStash
6
+ module Outputs
7
+ class Swift
8
+ class SizeAndTimeRotationPolicy
9
+ def initialize(file_size, time_file)
10
+ @size_strategy = SizeRotationPolicy.new(file_size)
11
+ @time_strategy = TimeRotationPolicy.new(time_file)
12
+ end
13
+
14
+ def rotate?(file)
15
+ @size_strategy.rotate?(file) || @time_strategy.rotate?(file)
16
+ end
17
+
18
+ def needs_periodic?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class Swift
5
+ class SizeRotationPolicy
6
+ attr_reader :size_file
7
+
8
+ def initialize(size_file)
9
+ if size_file <= 0
10
+ raise LogStash::ConfigurationError, "`size_file` need to be greather than 0"
11
+ end
12
+
13
+ @size_file = size_file
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size >= size_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ false
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "forwardable"
4
+ require "fileutils"
5
+
6
+ module LogStash
7
+ module Outputs
8
+ class Swift
9
+ # Wrap the actual file descriptor into an utility classe
10
+ # It make it more OOP and easier to reason with the paths.
11
+ class TemporaryFile
12
+ extend Forwardable
13
+
14
+ def_delegators :@fd, :path, :write, :close, :fsync
15
+
16
+ attr_reader :fd
17
+
18
+ def initialize(key, fd, temp_path)
19
+ @fd = fd
20
+ @key = key
21
+ @temp_path = temp_path
22
+ @created_at = Time.now
23
+ end
24
+
25
+ def ctime
26
+ @created_at
27
+ end
28
+
29
+ def temp_path
30
+ @temp_path
31
+ end
32
+
33
+ def size
34
+ # Use the fd size to get the accurate result,
35
+ # so we dont have to deal with fsync
36
+ # if the file is close we will use the File::size
37
+ begin
38
+ @fd.size
39
+ rescue IOError
40
+ ::File.size(path)
41
+ end
42
+ end
43
+
44
+ def key
45
+ @key.gsub(/^\//, "")
46
+ end
47
+
48
+ # Each temporary file is made inside a directory named with an UUID,
49
+ # instead of deleting the file directly and having the risk of deleting other files
50
+ # we delete the root of the UUID, using a UUID also remove the risk of deleting unwanted file, it acts as
51
+ # a sandbox.
52
+ def delete!
53
+ @fd.close rescue IOError # force close anyway
54
+ FileUtils.rm_r(@temp_path, :secure => true)
55
+ end
56
+
57
+ def empty?
58
+ size == 0
59
+ end
60
+
61
+ def self.create_from_existing_file(file_path, temporary_folder)
62
+ key_parts = Pathname.new(file_path).relative_path_from(temporary_folder).to_s.split(::File::SEPARATOR)
63
+
64
+ TemporaryFile.new(key_parts.slice(1, key_parts.size).join("/"),
65
+ ::File.open(file_path, "r"),
66
+ ::File.join(temporary_folder, key_parts.slice(0, 1)))
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,129 @@
1
+ # encoding: utf-8
2
+ require "socket"
3
+ require "securerandom"
4
+ require "fileutils"
5
+ require "zlib"
6
+ require "forwardable"
7
+
8
+ module LogStash
9
+ module Outputs
10
+ class Swift
11
+ # Since the file can contains dynamic part, we have to handle a more local structure to
12
+ # allow a nice recovery from a crash.
13
+ #
14
+ # The local structure will look like this.
15
+ #
16
+ # <TEMPORARY_PATH>/<UUID>/<prefix>/ls.s3.localhost.%Y-%m-%dT%H.%m.tag_es_fb.part1.txt.gz
17
+ #
18
+ # Since the UUID should be fairly unique I can destroy the whole path when an upload is complete.
19
+ # I do not have to mess around to check if the other directory have file in it before destroying them.
20
+ class TemporaryFileFactory
21
+ FILE_MODE = "a"
22
+ GZIP_ENCODING = "gzip"
23
+ GZIP_EXTENSION = "txt.gz"
24
+ TXT_EXTENSION = "txt"
25
+ STRFTIME = "%Y-%m-%dT%H.%M"
26
+
27
+ attr_accessor :counter, :tags, :prefix, :encoding, :temporary_directory, :current
28
+
29
+ def initialize(prefix, tags, encoding, temporary_directory)
30
+ @counter = 0
31
+ @prefix = prefix
32
+
33
+ @tags = tags
34
+ @encoding = encoding
35
+ @temporary_directory = temporary_directory
36
+ @lock = Mutex.new
37
+
38
+ rotate!
39
+ end
40
+
41
+ def rotate!
42
+ @lock.synchronize {
43
+ @current = new_file
44
+ increment_counter
45
+ @current
46
+ }
47
+ end
48
+
49
+ private
50
+ def extension
51
+ gzip? ? GZIP_EXTENSION : TXT_EXTENSION
52
+ end
53
+
54
+ def gzip?
55
+ encoding == GZIP_ENCODING
56
+ end
57
+
58
+ def increment_counter
59
+ @counter += 1
60
+ end
61
+
62
+ def current_time
63
+ Time.now.strftime(STRFTIME)
64
+ end
65
+
66
+ def generate_name
67
+ filename = "ls.s3.#{SecureRandom.uuid}.#{current_time}"
68
+
69
+ if tags.size > 0
70
+ "#{filename}.tag_#{tags.join('.')}.part#{counter}.#{extension}"
71
+ else
72
+ "#{filename}.part#{counter}.#{extension}"
73
+ end
74
+ end
75
+
76
+ def new_file
77
+ uuid = SecureRandom.uuid
78
+ name = generate_name
79
+ path = ::File.join(temporary_directory, uuid)
80
+ key = ::File.join(prefix, name)
81
+
82
+ FileUtils.mkdir_p(::File.join(path, prefix))
83
+
84
+ io = if gzip?
85
+ # We have to use this wrapper because we cannot access the size of the
86
+ # file directly on the gzip writer.
87
+ IOWrappedGzip.new(::File.open(::File.join(path, key), FILE_MODE))
88
+ else
89
+ ::File.open(::File.join(path, key), FILE_MODE)
90
+ end
91
+
92
+ TemporaryFile.new(key, io, path)
93
+ end
94
+
95
+ class IOWrappedGzip
96
+ extend Forwardable
97
+
98
+ def_delegators :@gzip_writer, :write, :close
99
+ attr_reader :file_io, :gzip_writer
100
+
101
+ def initialize(file_io)
102
+ @file_io = file_io
103
+ @gzip_writer = Zlib::GzipWriter.open(file_io)
104
+ end
105
+
106
+ def path
107
+ @gzip_writer.to_io.path
108
+ end
109
+
110
+ def size
111
+ # to get the current file size
112
+ if @gzip_writer.pos == 0
113
+ # Ensure a zero file size is returned when nothing has
114
+ # yet been written to the gzip file.
115
+ 0
116
+ else
117
+ @gzip_writer.flush
118
+ @gzip_writer.to_io.size
119
+ end
120
+ end
121
+
122
+ def fsync
123
+ @gzip_writer.to_io.fsync
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ module LogStash
3
+ module Outputs
4
+ class Swift
5
+ class TimeRotationPolicy
6
+ attr_reader :time_file
7
+
8
+ def initialize(time_file)
9
+ if time_file <= 0
10
+ raise LogStash::ConfigurationError, "`time_file` need to be greather than 0"
11
+ end
12
+
13
+ @time_file = time_file * 60
14
+ end
15
+
16
+ def rotate?(file)
17
+ file.size > 0 && (Time.now - file.ctime) >= time_file
18
+ end
19
+
20
+ def needs_periodic?
21
+ true
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+ require "logstash/util"
3
+
4
+ module LogStash
5
+ module Outputs
6
+ class Swift
7
+ class Uploader
8
+ TIME_BEFORE_RETRYING_SECONDS = 1
9
+ DEFAULT_THREADPOOL = Concurrent::ThreadPoolExecutor.new({
10
+ :min_threads => 1,
11
+ :max_threads => 8,
12
+ :max_queue => 1,
13
+ :fallback_policy => :caller_runs
14
+ })
15
+
16
+
17
+ attr_reader :container, :upload_options, :logger
18
+
19
+ def initialize(container, logger, threadpool = DEFAULT_THREADPOOL)
20
+ @container = container
21
+ @workers_pool = threadpool
22
+ @logger = logger
23
+ end
24
+
25
+ def upload_async(file, options = {})
26
+ @workers_pool.post do
27
+ LogStash::Util.set_thread_name("Swift output uploader, file: #{file.path}")
28
+ upload(file, options)
29
+ end
30
+ end
31
+
32
+ def upload(file, options = {})
33
+ puts 'in uploader. upload'
34
+ upload_options = options.fetch(:upload_options, {})
35
+ begin
36
+ container.files.create(key: file.key, body: ::File.read(::File.expand_path(file.path)))
37
+ puts 'dpone'
38
+ rescue Errno::ENOENT => e
39
+ logger.error("File doesn't exist! Unrecoverable error.", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
40
+ rescue => e
41
+ # When we get here it usually mean that S3 tried to do some retry by himself (default is 3)
42
+ # When the retry limit is reached or another error happen we will wait and retry.
43
+ #
44
+ # Thread might be stuck here, but I think its better than losing anything
45
+ # its either a transient errors or something bad really happened.
46
+ logger.error("Uploading failed, retrying.", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
47
+ sleep TIME_BEFORE_RETRYING_SECONDS
48
+ retry
49
+ end
50
+
51
+ options[:on_complete].call(file) unless options[:on_complete].nil?
52
+ rescue => e
53
+ logger.error("An error occured in the `on_complete` uploader", :exception => e.class, :message => e.message, :path => file.path, :backtrace => e.backtrace)
54
+ raise e # reraise it since we don't deal with it now
55
+ end
56
+
57
+ def stop
58
+ @workers_pool.shutdown
59
+ @workers_pool.wait_for_termination(nil) # block until its done
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end