logstash-core 2.2.4.snapshot1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of logstash-core might be problematic. Click here for more details.

Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/lib/logstash-core.rb +1 -0
  3. data/lib/logstash-core/logstash-core.rb +3 -0
  4. data/lib/logstash-core/version.rb +8 -0
  5. data/lib/logstash/agent.rb +391 -0
  6. data/lib/logstash/codecs/base.rb +50 -0
  7. data/lib/logstash/config/config_ast.rb +550 -0
  8. data/lib/logstash/config/cpu_core_strategy.rb +32 -0
  9. data/lib/logstash/config/defaults.rb +12 -0
  10. data/lib/logstash/config/file.rb +39 -0
  11. data/lib/logstash/config/grammar.rb +3503 -0
  12. data/lib/logstash/config/mixin.rb +518 -0
  13. data/lib/logstash/config/registry.rb +13 -0
  14. data/lib/logstash/environment.rb +98 -0
  15. data/lib/logstash/errors.rb +12 -0
  16. data/lib/logstash/filters/base.rb +205 -0
  17. data/lib/logstash/inputs/base.rb +116 -0
  18. data/lib/logstash/inputs/threadable.rb +18 -0
  19. data/lib/logstash/java_integration.rb +116 -0
  20. data/lib/logstash/json.rb +61 -0
  21. data/lib/logstash/logging.rb +91 -0
  22. data/lib/logstash/namespace.rb +13 -0
  23. data/lib/logstash/output_delegator.rb +172 -0
  24. data/lib/logstash/outputs/base.rb +91 -0
  25. data/lib/logstash/patches.rb +5 -0
  26. data/lib/logstash/patches/bugfix_jruby_2558.rb +51 -0
  27. data/lib/logstash/patches/cabin.rb +35 -0
  28. data/lib/logstash/patches/profile_require_calls.rb +47 -0
  29. data/lib/logstash/patches/rubygems.rb +38 -0
  30. data/lib/logstash/patches/stronger_openssl_defaults.rb +68 -0
  31. data/lib/logstash/pipeline.rb +499 -0
  32. data/lib/logstash/pipeline_reporter.rb +114 -0
  33. data/lib/logstash/plugin.rb +120 -0
  34. data/lib/logstash/program.rb +14 -0
  35. data/lib/logstash/runner.rb +124 -0
  36. data/lib/logstash/shutdown_watcher.rb +100 -0
  37. data/lib/logstash/util.rb +203 -0
  38. data/lib/logstash/util/buftok.rb +139 -0
  39. data/lib/logstash/util/charset.rb +35 -0
  40. data/lib/logstash/util/decorators.rb +52 -0
  41. data/lib/logstash/util/defaults_printer.rb +31 -0
  42. data/lib/logstash/util/filetools.rb +186 -0
  43. data/lib/logstash/util/java_version.rb +66 -0
  44. data/lib/logstash/util/password.rb +25 -0
  45. data/lib/logstash/util/plugin_version.rb +56 -0
  46. data/lib/logstash/util/prctl.rb +10 -0
  47. data/lib/logstash/util/retryable.rb +40 -0
  48. data/lib/logstash/util/socket_peer.rb +7 -0
  49. data/lib/logstash/util/unicode_trimmer.rb +81 -0
  50. data/lib/logstash/util/worker_threads_default_printer.rb +29 -0
  51. data/lib/logstash/util/wrapped_synchronous_queue.rb +41 -0
  52. data/lib/logstash/version.rb +14 -0
  53. data/locales/en.yml +204 -0
  54. data/logstash-core.gemspec +58 -0
  55. data/spec/conditionals_spec.rb +429 -0
  56. data/spec/logstash/agent_spec.rb +85 -0
  57. data/spec/logstash/config/config_ast_spec.rb +146 -0
  58. data/spec/logstash/config/cpu_core_strategy_spec.rb +123 -0
  59. data/spec/logstash/config/defaults_spec.rb +10 -0
  60. data/spec/logstash/config/mixin_spec.rb +158 -0
  61. data/spec/logstash/environment_spec.rb +56 -0
  62. data/spec/logstash/filters/base_spec.rb +251 -0
  63. data/spec/logstash/inputs/base_spec.rb +74 -0
  64. data/spec/logstash/java_integration_spec.rb +304 -0
  65. data/spec/logstash/json_spec.rb +96 -0
  66. data/spec/logstash/output_delegator_spec.rb +144 -0
  67. data/spec/logstash/outputs/base_spec.rb +40 -0
  68. data/spec/logstash/patches_spec.rb +90 -0
  69. data/spec/logstash/pipeline_reporter_spec.rb +85 -0
  70. data/spec/logstash/pipeline_spec.rb +455 -0
  71. data/spec/logstash/plugin_spec.rb +169 -0
  72. data/spec/logstash/runner_spec.rb +68 -0
  73. data/spec/logstash/shutdown_watcher_spec.rb +113 -0
  74. data/spec/logstash/util/buftok_spec.rb +31 -0
  75. data/spec/logstash/util/charset_spec.rb +74 -0
  76. data/spec/logstash/util/defaults_printer_spec.rb +50 -0
  77. data/spec/logstash/util/java_version_spec.rb +79 -0
  78. data/spec/logstash/util/plugin_version_spec.rb +64 -0
  79. data/spec/logstash/util/unicode_trimmer_spec.rb +55 -0
  80. data/spec/logstash/util/worker_threads_default_printer_spec.rb +45 -0
  81. data/spec/logstash/util/wrapped_synchronous_queue_spec.rb +28 -0
  82. data/spec/logstash/util_spec.rb +35 -0
  83. metadata +364 -0
@@ -0,0 +1,91 @@
1
+ # encoding: utf-8
2
+ require "logstash/event"
3
+ require "logstash/logging"
4
+ require "logstash/plugin"
5
+ require "logstash/namespace"
6
+ require "logstash/config/mixin"
7
+ require "logstash/util/wrapped_synchronous_queue"
8
+ require "concurrent/atomic/atomic_fixnum"
9
+
10
+ class LogStash::Outputs::Base < LogStash::Plugin
11
+ include LogStash::Config::Mixin
12
+
13
+ config_name "output"
14
+
15
+ config :type, :validate => :string, :default => "", :obsolete => "You can achieve this same behavior with the new conditionals, like: `if [type] == \"sometype\" { %PLUGIN% { ... } }`."
16
+
17
+ config :tags, :validate => :array, :default => [], :obsolete => "You can achieve similar behavior with the new conditionals, like: `if \"sometag\" in [tags] { %PLUGIN% { ... } }`"
18
+
19
+ config :exclude_tags, :validate => :array, :default => [], :obsolete => "You can achieve similar behavior with the new conditionals, like: `if (\"sometag\" not in [tags]) { %PLUGIN% { ... } }`"
20
+
21
+ # The codec used for output data. Output codecs are a convenient method for encoding your data before it leaves the output, without needing a separate filter in your Logstash pipeline.
22
+ config :codec, :validate => :codec, :default => "plain"
23
+
24
+ # The number of workers to use for this output.
25
+ # Note that this setting may not be useful for all outputs.
26
+ config :workers, :validate => :number, :default => 1
27
+
28
+ attr_reader :worker_plugins, :available_workers, :workers, :worker_plugins, :workers_not_supported
29
+
30
+ def self.declare_threadsafe!
31
+ declare_workers_not_supported!
32
+ @threadsafe = true
33
+ end
34
+
35
+ def self.threadsafe?
36
+ @threadsafe == true
37
+ end
38
+
39
+ def self.declare_workers_not_supported!(message=nil)
40
+ @workers_not_supported_message = message
41
+ @workers_not_supported = true
42
+ end
43
+
44
+ def self.workers_not_supported_message
45
+ @workers_not_supported_message
46
+ end
47
+
48
+ def self.workers_not_supported?
49
+ !!@workers_not_supported
50
+ end
51
+
52
+ public
53
+ # TODO: Remove this in the next major version after Logstash 2.x
54
+ # Post 2.x it should raise an error and tell people to use the class level
55
+ # declaration
56
+ def workers_not_supported(message=nil)
57
+ self.class.declare_workers_not_supported!(message)
58
+ end
59
+
60
+ public
61
+ def initialize(params={})
62
+ super
63
+ config_init(@params)
64
+
65
+ # If we're running with a single thread we must enforce single-threaded concurrency by default
66
+ # Maybe in a future version we'll assume output plugins are threadsafe
67
+ @single_worker_mutex = Mutex.new
68
+ end
69
+
70
+ public
71
+ def register
72
+ raise "#{self.class}#register must be overidden"
73
+ end # def register
74
+
75
+ public
76
+ def receive(event)
77
+ raise "#{self.class}#receive must be overidden"
78
+ end # def receive
79
+
80
+ public
81
+ # To be overriden in implementations
82
+ def multi_receive(events)
83
+ events.each {|event| receive(event) }
84
+ end
85
+
86
+ private
87
+ def output?(event)
88
+ # TODO: noop for now, remove this once we delete this call from all plugins
89
+ true
90
+ end # def output?
91
+ end # class LogStash::Outputs::Base
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ require "logstash/patches/bugfix_jruby_2558"
3
+ require "logstash/patches/cabin"
4
+ require "logstash/patches/profile_require_calls"
5
+ require "logstash/patches/stronger_openssl_defaults"
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ require "logstash/environment"
3
+
4
+ if LogStash::Environment.windows? && LogStash::Environment.jruby?
5
+ require "socket"
6
+ module JRubyBug2558SocketPeerAddrBugFix
7
+ def peeraddr(*args)
8
+ orig_peeraddr(*args).map do |v|
9
+ case v
10
+ when String
11
+ v.force_encoding(Encoding::UTF_8)
12
+ else
13
+ v
14
+ end
15
+ end
16
+ end
17
+ end
18
+
19
+ class << Socket
20
+ # Bugfix for jruby #2558
21
+ alias_method :orig_gethostname, :gethostname
22
+ def gethostname
23
+ return orig_gethostname.force_encoding(Encoding::UTF_8)
24
+ end
25
+ end
26
+
27
+ class TCPSocket
28
+ alias_method :orig_peeraddr, :peeraddr
29
+ include JRubyBug2558SocketPeerAddrBugFix
30
+ end
31
+ class UDPSocket
32
+ alias_method :orig_peeraddr, :peeraddr
33
+ include JRubyBug2558SocketPeerAddrBugFix
34
+ end
35
+ end
36
+
37
+ if LogStash::Environment.windows?
38
+ # make sure all strings pulled out of ENV are UTF8
39
+ class <<ENV
40
+ alias_method :orig_getter, :[]
41
+ def [](key)
42
+ case value = orig_getter(key)
43
+ when String
44
+ # dup is necessary since force_encoding is destructive
45
+ value.dup.force_encoding(Encoding::UTF_8)
46
+ else
47
+ value
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+ if ENV["PROFILE_BAD_LOG_CALLS"] || ($DEBUGLIST || []).include?("log")
3
+ # Set PROFILE_BAD_LOG_CALLS=1 in your environment if you want
4
+ # to track down logger calls that cause performance problems
5
+ #
6
+ # Related research here:
7
+ # https://github.com/jordansissel/experiments/tree/master/ruby/logger-string-vs-block
8
+ #
9
+ # Basically, the following is wastes tons of effort creating objects that are
10
+ # never used if the log level hides the log:
11
+ #
12
+ # logger.debug("something happend", :what => Happened)
13
+ #
14
+ # This is shown to be 4x faster:
15
+ #
16
+ # logger.debug(...) if logger.debug?
17
+ #
18
+ # I originally intended to use RubyParser and SexpProcessor to
19
+ # process all the logstash ruby code offline, but it was much
20
+ # faster to write this monkeypatch to warn as things are called.
21
+ require "cabin/mixins/logger"
22
+ module Cabin::Mixins::Logger
23
+ LEVELS.keys.each do |level|
24
+ m = "original_#{level}".to_sym
25
+ predicate = "#{level}?".to_sym
26
+ alias_method m, level
27
+ define_method(level) do |*args|
28
+ if !send(predicate)
29
+ warn("Unconditional log call", :location => caller[0])
30
+ end
31
+ send(m, *args)
32
+ end
33
+ end
34
+ end
35
+ end # PROFILE_BAD_LOG_CALLS
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ if ($DEBUGLIST || []).include?("require")
3
+ ROOT = File.dirname(__FILE__)
4
+ module Kernel
5
+ alias_method :require_debug, :require
6
+
7
+ def require(path)
8
+ start = Time.now
9
+ result = require_debug(path)
10
+ duration = Time.now - start
11
+
12
+ origin = caller[1]
13
+ if origin =~ /rubygems\/custom_require/
14
+ origin = caller[3]
15
+ if origin.nil?
16
+ STDERR.puts "Unknown origin"
17
+ STDERR.puts caller.join("\n")
18
+ end
19
+ end
20
+ origin = origin.gsub(/:[0-9]+:in .*/, "") if origin
21
+
22
+ # Only print require() calls that did actual work.
23
+ # require() returns true on load, false if already loaded.
24
+ if result
25
+ source = caller[0]
26
+ #p source.include?("/lib/polyglot.rb:63:in `require'") => source
27
+ if source.include?("/lib/polyglot.rb:63:in `require'")
28
+ source = caller[1]
29
+ end
30
+
31
+ #target = $LOADED_FEATURES.grep(/#{path}/).first
32
+ #puts path
33
+ #puts caller.map { |c| " #{c}" }.join("\n")
34
+ #fontsize = [10, duration * 48].max
35
+ puts "#{duration},#{path},#{source}"
36
+ end
37
+ #puts caller.map { |c| " => #{c}" }.join("\n")
38
+ end
39
+
40
+ alias_method :load_debug, :load
41
+
42
+ def load(path)
43
+ puts "load(\"#{path}\")"
44
+ return load_debug(path)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+ # monkey patch RubyGems to silence ffi warnings:
3
+ #
4
+ # WARN: Unresolved specs during Gem::Specification.reset:
5
+ # ffi (>= 0)
6
+ # WARN: Clearing out unresolved specs.
7
+ # Please report a bug if this causes problems.
8
+ #
9
+ # see https://github.com/elasticsearch/logstash/issues/2556 and https://github.com/rubygems/rubygems/issues/1070
10
+ #
11
+ # this code is from Rubygems v2.1.9 in JRuby 1.7.17. Per tickets this issue should be solved at JRuby >= 1.7.20.
12
+ #
13
+ # this method implementation works for Rubygems version 2.1.0 and up, verified up to 2.4.6
14
+ if ::Gem::Version.new(::Gem::VERSION) >= ::Gem::Version.new("2.1.0") && ::Gem::Version.new(::Gem::VERSION) < ::Gem::Version.new("2.5.0")
15
+ class ::Gem::Specification
16
+ def self.reset
17
+ @@dirs = nil
18
+ ::Gem.pre_reset_hooks.each { |hook| hook.call }
19
+ @@all = nil
20
+ @@stubs = nil
21
+ _clear_load_cache
22
+ unresolved = unresolved_deps
23
+ unless unresolved.empty?
24
+ unless (unresolved.size == 1 && unresolved["ffi"])
25
+ w = "W" + "ARN"
26
+ warn "#{w}: Unresolved specs during Gem::Specification.reset:"
27
+ unresolved.values.each do |dep|
28
+ warn " #{dep}"
29
+ end
30
+ warn "#{w}: Clearing out unresolved specs."
31
+ warn "Please report a bug if this causes problems."
32
+ end
33
+ unresolved.clear
34
+ end
35
+ ::Gem.post_reset_hooks.each { |hook| hook.call }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,68 @@
1
+ # encoding: utf-8
2
+ require "openssl"
3
+
4
+ # :nodoc:
5
+ class OpenSSL::SSL::SSLContext
6
+ # Wrap SSLContext.new to a stronger default settings.
7
+ class << self
8
+ alias_method :orig_new, :new
9
+ def new(*args)
10
+ c = orig_new(*args)
11
+
12
+ # MRI nor JRuby seem to actually invoke `SSLContext#set_params` by
13
+ # default, which makes the default ciphers (and other settings) not
14
+ # actually defaults. Oops!
15
+ # To force this, and force our (hopefully more secure) defaults on
16
+ # all things using openssl in Ruby, we will invoke set_params
17
+ # on all new SSLContext objects.
18
+ c.set_params
19
+ c
20
+ end
21
+ end
22
+
23
+ # This cipher selection comes from https://wiki.mozilla.org/Security/Server_Side_TLS
24
+ MOZILLA_INTERMEDIATE_CIPHERS = "ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-DSS-AES128-GCM-SHA256:kEDH+AESGCM:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-DSS-AES128-SHA256:DHE-RSA-AES256-SHA256:DHE-DSS-AES256-SHA:DHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:AES:CAMELLIA:DES-CBC3-SHA:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!aECDH:!EDH-DSS-DES-CBC3-SHA:!EDH-RSA-DES-CBC3-SHA:!KRB5-DES-CBC3-SHA"
25
+
26
+ # Returns the value that should be used for the default SSLContext options
27
+ #
28
+ # This is a method instead of a constant because some constants (like
29
+ # OpenSSL::SSL::OP_NO_COMPRESSION) may not be available in all Ruby
30
+ # versions/platforms.
31
+ def self.__default_options
32
+ # ruby-core is refusing to patch ruby's default openssl settings to be more
33
+ # secure, so let's fix that here. The next few lines setting options and
34
+ # ciphers come from jmhodges' proposed patch
35
+ ssloptions = OpenSSL::SSL::OP_ALL
36
+
37
+ # TODO(sissel): JRuby doesn't have this. Maybe work on a fix?
38
+ if defined?(OpenSSL::SSL::OP_DONT_INSERT_EMPTY_FRAGMENTS)
39
+ ssloptions &= ~OpenSSL::SSL::OP_DONT_INSERT_EMPTY_FRAGMENTS
40
+ end
41
+
42
+ # TODO(sissel): JRuby doesn't have this. Maybe work on a fix?
43
+ if defined?(OpenSSL::SSL::OP_NO_COMPRESSION)
44
+ ssloptions |= OpenSSL::SSL::OP_NO_COMPRESSION
45
+ end
46
+
47
+ # Disable SSLv2 and SSLv3. They are insecure and highly discouraged.
48
+ ssloptions |= OpenSSL::SSL::OP_NO_SSLv2 if defined?(OpenSSL::SSL::OP_NO_SSLv2)
49
+ ssloptions |= OpenSSL::SSL::OP_NO_SSLv3 if defined?(OpenSSL::SSL::OP_NO_SSLv3)
50
+ ssloptions
51
+ end
52
+
53
+ # Overwriting the DEFAULT_PARAMS const idea from here: https://www.ruby-lang.org/en/news/2014/10/27/changing-default-settings-of-ext-openssl/
54
+ #
55
+ # This monkeypatch doesn't enforce a `VERIFY_MODE` on the SSLContext,
56
+ # SSLContext are both used for the client and the server implementation,
57
+ # If set the `verify_mode` to peer the server wont accept any connection,
58
+ # because it will try to verify the client certificate, this is a protocol
59
+ # details implemented at the plugin level.
60
+ #
61
+ # For more details see: https://github.com/elastic/logstash/issues/3657
62
+ remove_const(:DEFAULT_PARAMS) if const_defined?(:DEFAULT_PARAMS)
63
+ DEFAULT_PARAMS = {
64
+ :ssl_version => "TLS",
65
+ :ciphers => MOZILLA_INTERMEDIATE_CIPHERS,
66
+ :options => __default_options # Not a constant because it's computed at start-time.
67
+ }
68
+ end
@@ -0,0 +1,499 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "stud/interval"
4
+ require "concurrent"
5
+ require "logstash/namespace"
6
+ require "logstash/errors"
7
+ require "logstash/event"
8
+ require "logstash/config/file"
9
+ require "logstash/filters/base"
10
+ require "logstash/inputs/base"
11
+ require "logstash/outputs/base"
12
+ require "logstash/config/cpu_core_strategy"
13
+ require "logstash/util/defaults_printer"
14
+ require "logstash/shutdown_watcher"
15
+ require "logstash/util/wrapped_synchronous_queue"
16
+ require "logstash/pipeline_reporter"
17
+ require "logstash/output_delegator"
18
+
19
+ module LogStash; class Pipeline
20
+ attr_reader :inputs, :filters, :outputs, :worker_threads, :events_consumed, :events_filtered, :reporter, :pipeline_id, :logger
21
+
22
+ DEFAULT_SETTINGS = {
23
+ :default_pipeline_workers => LogStash::Config::CpuCoreStrategy.maximum,
24
+ :pipeline_batch_size => 125,
25
+ :pipeline_batch_delay => 5, # in milliseconds
26
+ :flush_interval => 5, # in seconds
27
+ :flush_timeout_interval => 60, # in seconds
28
+ :debug_config => false
29
+ }
30
+ MAX_INFLIGHT_WARN_THRESHOLD = 10_000
31
+
32
+ def initialize(config_str, settings = {})
33
+ @pipeline_id = settings[:pipeline_id] || self.object_id
34
+ @logger = Cabin::Channel.get(LogStash)
35
+ @settings = DEFAULT_SETTINGS.clone
36
+ settings.each {|setting, value| configure(setting, value) }
37
+ @reporter = LogStash::PipelineReporter.new(@logger, self)
38
+
39
+ @inputs = nil
40
+ @filters = nil
41
+ @outputs = nil
42
+
43
+ @worker_threads = []
44
+
45
+ grammar = LogStashConfigParser.new
46
+ @config = grammar.parse(config_str)
47
+ if @config.nil?
48
+ raise LogStash::ConfigurationError, grammar.failure_reason
49
+ end
50
+ # This will compile the config to ruby and evaluate the resulting code.
51
+ # The code will initialize all the plugins and define the
52
+ # filter and output methods.
53
+ code = @config.compile
54
+ # The config code is hard to represent as a log message...
55
+ # So just print it.
56
+ if @settings[:debug_config]
57
+ @logger.debug? && @logger.debug("Compiled pipeline code:\n#{code}")
58
+ end
59
+ begin
60
+ eval(code)
61
+ rescue => e
62
+ raise
63
+ end
64
+
65
+ @input_queue = LogStash::Util::WrappedSynchronousQueue.new
66
+ @events_filtered = Concurrent::AtomicFixnum.new(0)
67
+ @events_consumed = Concurrent::AtomicFixnum.new(0)
68
+
69
+ # We generally only want one thread at a time able to access pop/take/poll operations
70
+ # from this queue. We also depend on this to be able to block consumers while we snapshot
71
+ # in-flight buffers
72
+ @input_queue_pop_mutex = Mutex.new
73
+ @input_threads = []
74
+ # @ready requires thread safety since it is typically polled from outside the pipeline thread
75
+ @ready = Concurrent::AtomicBoolean.new(false)
76
+ @running = Concurrent::AtomicBoolean.new(false)
77
+ @flushing = Concurrent::AtomicReference.new(false)
78
+ end # def initialize
79
+
80
+ def ready?
81
+ @ready.value
82
+ end
83
+
84
+ def configure(setting, value)
85
+ @settings[setting] = value
86
+ end
87
+
88
+ def safe_pipeline_worker_count
89
+ default = DEFAULT_SETTINGS[:default_pipeline_workers]
90
+ thread_count = @settings[:pipeline_workers] #override from args "-w 8" or config
91
+ safe_filters, unsafe_filters = @filters.partition(&:threadsafe?)
92
+
93
+ if unsafe_filters.any?
94
+ plugins = unsafe_filters.collect { |f| f.class.config_name }
95
+ case thread_count
96
+ when nil
97
+ # user did not specify a worker thread count
98
+ # warn if the default is multiple
99
+
100
+ if default > 1
101
+ @logger.warn("Defaulting pipeline worker threads to 1 because there are some filters that might not work with multiple worker threads",
102
+ :count_was => default, :filters => plugins)
103
+ end
104
+
105
+ 1 # can't allow the default value to propagate if there are unsafe filters
106
+ when 0, 1
107
+ 1
108
+ else
109
+ @logger.warn("Warning: Manual override - there are filters that might not work with multiple worker threads",
110
+ :worker_threads => thread_count, :filters => plugins)
111
+ thread_count # allow user to force this even if there are unsafe filters
112
+ end
113
+ else
114
+ thread_count || default
115
+ end
116
+ end
117
+
118
+ def filters?
119
+ return @filters.any?
120
+ end
121
+
122
+ def run
123
+ LogStash::Util.set_thread_name("[#{pipeline_id}]-pipeline-manager")
124
+ @logger.terminal(LogStash::Util::DefaultsPrinter.print(@settings))
125
+
126
+ start_workers
127
+
128
+ @logger.info("Pipeline started")
129
+ @logger.terminal("Logstash startup completed")
130
+
131
+ # Block until all inputs have stopped
132
+ # Generally this happens if SIGINT is sent and `shutdown` is called from an external thread
133
+
134
+ transition_to_running
135
+ start_flusher # Launches a non-blocking thread for flush events
136
+ wait_inputs
137
+ transition_to_stopped
138
+
139
+ @logger.info("Input plugins stopped! Will shutdown filter/output workers.")
140
+
141
+ shutdown_flusher
142
+ shutdown_workers
143
+
144
+ @logger.info("Pipeline shutdown complete.")
145
+ @logger.terminal("Logstash shutdown completed")
146
+
147
+ # exit code
148
+ return 0
149
+ end # def run
150
+
151
+ def transition_to_running
152
+ @running.make_true
153
+ end
154
+
155
+ def transition_to_stopped
156
+ @running.make_false
157
+ end
158
+
159
+ def running?
160
+ @running.true?
161
+ end
162
+
163
+ def stopped?
164
+ @running.false?
165
+ end
166
+
167
+ def start_workers
168
+ @inflight_batches = {}
169
+
170
+ @worker_threads.clear # In case we're restarting the pipeline
171
+ begin
172
+ start_inputs
173
+ @outputs.each {|o| o.register }
174
+ @filters.each {|f| f.register}
175
+
176
+ pipeline_workers = safe_pipeline_worker_count
177
+ batch_size = @settings[:pipeline_batch_size]
178
+ batch_delay = @settings[:pipeline_batch_delay]
179
+ max_inflight = batch_size * pipeline_workers
180
+ @logger.info("Starting pipeline",
181
+ :id => self.pipeline_id,
182
+ :pipeline_workers => pipeline_workers,
183
+ :batch_size => batch_size,
184
+ :batch_delay => batch_delay,
185
+ :max_inflight => max_inflight)
186
+ if max_inflight > MAX_INFLIGHT_WARN_THRESHOLD
187
+ @logger.warn "CAUTION: Recommended inflight events max exceeded! Logstash will run with up to #{max_inflight} events in memory in your current configuration. If your message sizes are large this may cause instability with the default heap size. Please consider setting a non-standard heap size, changing the batch size (currently #{batch_size}), or changing the number of pipeline workers (currently #{pipeline_workers})"
188
+ end
189
+
190
+ pipeline_workers.times do |t|
191
+ @worker_threads << Thread.new do
192
+ LogStash::Util.set_thread_name("[#{pipeline_id}]>worker#{t}")
193
+ worker_loop(batch_size, batch_delay)
194
+ end
195
+ end
196
+ ensure
197
+ # it is important to garantee @ready to be true after the startup sequence has been completed
198
+ # to potentially unblock the shutdown method which may be waiting on @ready to proceed
199
+ @ready.make_true
200
+ end
201
+ end
202
+
203
+ # Main body of what a worker thread does
204
+ # Repeatedly takes batches off the queu, filters, then outputs them
205
+ def worker_loop(batch_size, batch_delay)
206
+ running = true
207
+
208
+ while running
209
+ # To understand the purpose behind this synchronize please read the body of take_batch
210
+ input_batch, signal = @input_queue_pop_mutex.synchronize { take_batch(batch_size, batch_delay) }
211
+ running = false if signal == LogStash::SHUTDOWN
212
+
213
+ @events_consumed.increment(input_batch.size)
214
+
215
+ filtered_batch = filter_batch(input_batch)
216
+
217
+ if signal # Flush on SHUTDOWN or FLUSH
218
+ flush_options = (signal == LogStash::SHUTDOWN) ? {:final => true} : {}
219
+ flush_filters_to_batch(filtered_batch, flush_options)
220
+ end
221
+
222
+ @events_filtered.increment(filtered_batch.size)
223
+
224
+ output_batch(filtered_batch)
225
+
226
+ inflight_batches_synchronize { set_current_thread_inflight_batch(nil) }
227
+ end
228
+ end
229
+
230
+ def take_batch(batch_size, batch_delay)
231
+ batch = []
232
+ # Since this is externally synchronized in `worker_look` wec can guarantee that the visibility of an insight batch
233
+ # guaranteed to be a full batch not a partial batch
234
+ set_current_thread_inflight_batch(batch)
235
+
236
+ signal = false
237
+ batch_size.times do |t|
238
+ event = (t == 0) ? @input_queue.take : @input_queue.poll(batch_delay)
239
+
240
+ if event.nil?
241
+ next
242
+ elsif event == LogStash::SHUTDOWN || event == LogStash::FLUSH
243
+ # We MUST break here. If a batch consumes two SHUTDOWN events
244
+ # then another worker may have its SHUTDOWN 'stolen', thus blocking
245
+ # the pipeline. We should stop doing work after flush as well.
246
+ signal = event
247
+ break
248
+ else
249
+ batch << event
250
+ end
251
+ end
252
+
253
+ [batch, signal]
254
+ end
255
+
256
+ def filter_batch(batch)
257
+ batch.reduce([]) do |acc,e|
258
+ if e.is_a?(LogStash::Event)
259
+ filtered = filter_func(e)
260
+ filtered.each {|fe| acc << fe unless fe.cancelled?}
261
+ end
262
+ acc
263
+ end
264
+ rescue Exception => e
265
+ # Plugins authors should manage their own exceptions in the plugin code
266
+ # but if an exception is raised up to the worker thread they are considered
267
+ # fatal and logstash will not recover from this situation.
268
+ #
269
+ # Users need to check their configuration or see if there is a bug in the
270
+ # plugin.
271
+ @logger.error("Exception in pipelineworker, the pipeline stopped processing new events, please check your filter configuration and restart Logstash.",
272
+ "exception" => e, "backtrace" => e.backtrace)
273
+ raise
274
+ end
275
+
276
+ # Take an array of events and send them to the correct output
277
+ def output_batch(batch)
278
+ # Build a mapping of { output_plugin => [events...]}
279
+ outputs_events = batch.reduce(Hash.new { |h, k| h[k] = [] }) do |acc, event|
280
+ # We ask the AST to tell us which outputs to send each event to
281
+ # Then, we stick it in the correct bin
282
+
283
+ # output_func should never return anything other than an Array but we have lots of legacy specs
284
+ # that monkeypatch it and return nil. We can deprecate "|| []" after fixing these specs
285
+ outputs_for_event = output_func(event) || []
286
+
287
+ outputs_for_event.each { |output| acc[output] << event }
288
+ acc
289
+ end
290
+
291
+ # Now that we have our output to event mapping we can just invoke each output
292
+ # once with its list of events
293
+ outputs_events.each { |output, events| output.multi_receive(events) }
294
+ end
295
+
296
+ def set_current_thread_inflight_batch(batch)
297
+ @inflight_batches[Thread.current] = batch
298
+ end
299
+
300
+ def inflight_batches_synchronize
301
+ @input_queue_pop_mutex.synchronize do
302
+ yield(@inflight_batches)
303
+ end
304
+ end
305
+
306
+ def wait_inputs
307
+ @input_threads.each(&:join)
308
+ end
309
+
310
+ def start_inputs
311
+ moreinputs = []
312
+ @inputs.each do |input|
313
+ if input.threadable && input.threads > 1
314
+ (input.threads - 1).times do |i|
315
+ moreinputs << input.clone
316
+ end
317
+ end
318
+ end
319
+ @inputs += moreinputs
320
+
321
+ @inputs.each do |input|
322
+ input.register
323
+ start_input(input)
324
+ end
325
+ end
326
+
327
+ def start_input(plugin)
328
+ @input_threads << Thread.new { inputworker(plugin) }
329
+ end
330
+
331
+ def inputworker(plugin)
332
+ LogStash::Util::set_thread_name("[#{pipeline_id}]<#{plugin.class.config_name}")
333
+ begin
334
+ plugin.run(@input_queue)
335
+ rescue => e
336
+ if plugin.stop?
337
+ @logger.debug("Input plugin raised exception during shutdown, ignoring it.",
338
+ :plugin => plugin.class.config_name, :exception => e,
339
+ :backtrace => e.backtrace)
340
+ return
341
+ end
342
+
343
+ # otherwise, report error and restart
344
+ if @logger.debug?
345
+ @logger.error(I18n.t("logstash.pipeline.worker-error-debug",
346
+ :plugin => plugin.inspect, :error => e.to_s,
347
+ :exception => e.class,
348
+ :stacktrace => e.backtrace.join("\n")))
349
+ else
350
+ @logger.error(I18n.t("logstash.pipeline.worker-error",
351
+ :plugin => plugin.inspect, :error => e))
352
+ end
353
+
354
+ # Assuming the failure that caused this exception is transient,
355
+ # let's sleep for a bit and execute #run again
356
+ sleep(1)
357
+ retry
358
+ ensure
359
+ plugin.do_close
360
+ end
361
+ end # def inputworker
362
+
363
+ # initiate the pipeline shutdown sequence
364
+ # this method is intended to be called from outside the pipeline thread
365
+ # @param before_stop [Proc] code block called before performing stop operation on input plugins
366
+ def shutdown(&before_stop)
367
+ # shutdown can only start once the pipeline has completed its startup.
368
+ # avoid potential race conditoon between the startup sequence and this
369
+ # shutdown method which can be called from another thread at any time
370
+ sleep(0.1) while !ready?
371
+
372
+ # TODO: should we also check against calling shutdown multiple times concurently?
373
+
374
+ before_stop.call if block_given?
375
+
376
+ @logger.info "Closing inputs"
377
+ @inputs.each(&:do_stop)
378
+ @logger.info "Closed inputs"
379
+ end # def shutdown
380
+
381
+ # After `shutdown` is called from an external thread this is called from the main thread to
382
+ # tell the worker threads to stop and then block until they've fully stopped
383
+ # This also stops all filter and output plugins
384
+ def shutdown_workers
385
+ # Each worker thread will receive this exactly once!
386
+ @worker_threads.each do |t|
387
+ @logger.debug("Pushing shutdown", :thread => t)
388
+ @input_queue.push(LogStash::SHUTDOWN)
389
+ end
390
+
391
+ @worker_threads.each do |t|
392
+ @logger.debug("Shutdown waiting for worker thread #{t}")
393
+ t.join
394
+ end
395
+
396
+ @filters.each(&:do_close)
397
+ @outputs.each(&:do_close)
398
+ end
399
+
400
+ def plugin(plugin_type, name, *args)
401
+ args << {} if args.empty?
402
+
403
+ klass = LogStash::Plugin.lookup(plugin_type, name)
404
+
405
+ if plugin_type == "output"
406
+ LogStash::OutputDelegator.new(@logger, klass, default_output_workers, *args)
407
+ else
408
+ klass.new(*args)
409
+ end
410
+ end
411
+
412
+ def default_output_workers
413
+ @settings[:pipeline_workers] || @settings[:default_pipeline_workers]
414
+ end
415
+
416
+ # for backward compatibility in devutils for the rspec helpers, this method is not used
417
+ # in the pipeline anymore.
418
+ def filter(event, &block)
419
+ # filter_func returns all filtered events, including cancelled ones
420
+ filter_func(event).each { |e| block.call(e) }
421
+ end
422
+
423
+
424
+ # perform filters flush and yeild flushed event to the passed block
425
+ # @param options [Hash]
426
+ # @option options [Boolean] :final => true to signal a final shutdown flush
427
+ def flush_filters(options = {}, &block)
428
+ flushers = options[:final] ? @shutdown_flushers : @periodic_flushers
429
+
430
+ flushers.each do |flusher|
431
+ flusher.call(options, &block)
432
+ end
433
+ end
434
+
435
+ def start_flusher
436
+ # Invariant to help detect improper initialization
437
+ raise "Attempted to start flusher on a stopped pipeline!" if stopped?
438
+
439
+ @flusher_thread = Thread.new do
440
+ while Stud.stoppable_sleep(5, 0.1) { stopped? }
441
+ flush
442
+ break if stopped?
443
+ end
444
+ end
445
+ end
446
+
447
+ def shutdown_flusher
448
+ @flusher_thread.join
449
+ end
450
+
451
+ def flush
452
+ if @flushing.compare_and_set(false, true)
453
+ @logger.debug? && @logger.debug("Pushing flush onto pipeline")
454
+ @input_queue.push(LogStash::FLUSH)
455
+ end
456
+ end
457
+
458
+ # perform filters flush into the output queue
459
+ # @param options [Hash]
460
+ # @option options [Boolean] :final => true to signal a final shutdown flush
461
+ def flush_filters_to_batch(batch, options = {})
462
+ flush_filters(options) do |event|
463
+ unless event.cancelled?
464
+ @logger.debug? and @logger.debug("Pushing flushed events", :event => event)
465
+ batch << event
466
+ end
467
+ end
468
+
469
+ @flushing.set(false)
470
+ end # flush_filters_to_output!
471
+
472
+ def plugin_threads_info
473
+ input_threads = @input_threads.select {|t| t.alive? }
474
+ worker_threads = @worker_threads.select {|t| t.alive? }
475
+ (input_threads + worker_threads).map {|t| LogStash::Util.thread_info(t) }
476
+ end
477
+
478
+ def stalling_threads_info
479
+ plugin_threads_info
480
+ .reject {|t| t["blocked_on"] } # known benign blocking statuses
481
+ .each {|t| t.delete("backtrace") }
482
+ .each {|t| t.delete("blocked_on") }
483
+ .each {|t| t.delete("status") }
484
+ end
485
+
486
+ # Sometimes we log stuff that will dump the pipeline which may contain
487
+ # sensitive information (like the raw syntax tree which can contain passwords)
488
+ # We want to hide most of what's in here
489
+ def inspect
490
+ {
491
+ :pipeline_id => @pipeline_id,
492
+ :settings => @settings.inspect,
493
+ :ready => @ready,
494
+ :running => @running,
495
+ :flushing => @flushing
496
+ }
497
+ end
498
+
499
+ end end