logstash-core 2.2.4.snapshot1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of logstash-core might be problematic. Click here for more details.

Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/lib/logstash-core.rb +1 -0
  3. data/lib/logstash-core/logstash-core.rb +3 -0
  4. data/lib/logstash-core/version.rb +8 -0
  5. data/lib/logstash/agent.rb +391 -0
  6. data/lib/logstash/codecs/base.rb +50 -0
  7. data/lib/logstash/config/config_ast.rb +550 -0
  8. data/lib/logstash/config/cpu_core_strategy.rb +32 -0
  9. data/lib/logstash/config/defaults.rb +12 -0
  10. data/lib/logstash/config/file.rb +39 -0
  11. data/lib/logstash/config/grammar.rb +3503 -0
  12. data/lib/logstash/config/mixin.rb +518 -0
  13. data/lib/logstash/config/registry.rb +13 -0
  14. data/lib/logstash/environment.rb +98 -0
  15. data/lib/logstash/errors.rb +12 -0
  16. data/lib/logstash/filters/base.rb +205 -0
  17. data/lib/logstash/inputs/base.rb +116 -0
  18. data/lib/logstash/inputs/threadable.rb +18 -0
  19. data/lib/logstash/java_integration.rb +116 -0
  20. data/lib/logstash/json.rb +61 -0
  21. data/lib/logstash/logging.rb +91 -0
  22. data/lib/logstash/namespace.rb +13 -0
  23. data/lib/logstash/output_delegator.rb +172 -0
  24. data/lib/logstash/outputs/base.rb +91 -0
  25. data/lib/logstash/patches.rb +5 -0
  26. data/lib/logstash/patches/bugfix_jruby_2558.rb +51 -0
  27. data/lib/logstash/patches/cabin.rb +35 -0
  28. data/lib/logstash/patches/profile_require_calls.rb +47 -0
  29. data/lib/logstash/patches/rubygems.rb +38 -0
  30. data/lib/logstash/patches/stronger_openssl_defaults.rb +68 -0
  31. data/lib/logstash/pipeline.rb +499 -0
  32. data/lib/logstash/pipeline_reporter.rb +114 -0
  33. data/lib/logstash/plugin.rb +120 -0
  34. data/lib/logstash/program.rb +14 -0
  35. data/lib/logstash/runner.rb +124 -0
  36. data/lib/logstash/shutdown_watcher.rb +100 -0
  37. data/lib/logstash/util.rb +203 -0
  38. data/lib/logstash/util/buftok.rb +139 -0
  39. data/lib/logstash/util/charset.rb +35 -0
  40. data/lib/logstash/util/decorators.rb +52 -0
  41. data/lib/logstash/util/defaults_printer.rb +31 -0
  42. data/lib/logstash/util/filetools.rb +186 -0
  43. data/lib/logstash/util/java_version.rb +66 -0
  44. data/lib/logstash/util/password.rb +25 -0
  45. data/lib/logstash/util/plugin_version.rb +56 -0
  46. data/lib/logstash/util/prctl.rb +10 -0
  47. data/lib/logstash/util/retryable.rb +40 -0
  48. data/lib/logstash/util/socket_peer.rb +7 -0
  49. data/lib/logstash/util/unicode_trimmer.rb +81 -0
  50. data/lib/logstash/util/worker_threads_default_printer.rb +29 -0
  51. data/lib/logstash/util/wrapped_synchronous_queue.rb +41 -0
  52. data/lib/logstash/version.rb +14 -0
  53. data/locales/en.yml +204 -0
  54. data/logstash-core.gemspec +58 -0
  55. data/spec/conditionals_spec.rb +429 -0
  56. data/spec/logstash/agent_spec.rb +85 -0
  57. data/spec/logstash/config/config_ast_spec.rb +146 -0
  58. data/spec/logstash/config/cpu_core_strategy_spec.rb +123 -0
  59. data/spec/logstash/config/defaults_spec.rb +10 -0
  60. data/spec/logstash/config/mixin_spec.rb +158 -0
  61. data/spec/logstash/environment_spec.rb +56 -0
  62. data/spec/logstash/filters/base_spec.rb +251 -0
  63. data/spec/logstash/inputs/base_spec.rb +74 -0
  64. data/spec/logstash/java_integration_spec.rb +304 -0
  65. data/spec/logstash/json_spec.rb +96 -0
  66. data/spec/logstash/output_delegator_spec.rb +144 -0
  67. data/spec/logstash/outputs/base_spec.rb +40 -0
  68. data/spec/logstash/patches_spec.rb +90 -0
  69. data/spec/logstash/pipeline_reporter_spec.rb +85 -0
  70. data/spec/logstash/pipeline_spec.rb +455 -0
  71. data/spec/logstash/plugin_spec.rb +169 -0
  72. data/spec/logstash/runner_spec.rb +68 -0
  73. data/spec/logstash/shutdown_watcher_spec.rb +113 -0
  74. data/spec/logstash/util/buftok_spec.rb +31 -0
  75. data/spec/logstash/util/charset_spec.rb +74 -0
  76. data/spec/logstash/util/defaults_printer_spec.rb +50 -0
  77. data/spec/logstash/util/java_version_spec.rb +79 -0
  78. data/spec/logstash/util/plugin_version_spec.rb +64 -0
  79. data/spec/logstash/util/unicode_trimmer_spec.rb +55 -0
  80. data/spec/logstash/util/worker_threads_default_printer_spec.rb +45 -0
  81. data/spec/logstash/util/wrapped_synchronous_queue_spec.rb +28 -0
  82. data/spec/logstash/util_spec.rb +35 -0
  83. metadata +364 -0
@@ -0,0 +1,91 @@
1
+ # encoding: utf-8
2
+ require "logstash/event"
3
+ require "logstash/logging"
4
+ require "logstash/plugin"
5
+ require "logstash/namespace"
6
+ require "logstash/config/mixin"
7
+ require "logstash/util/wrapped_synchronous_queue"
8
+ require "concurrent/atomic/atomic_fixnum"
9
+
10
+ class LogStash::Outputs::Base < LogStash::Plugin
11
+ include LogStash::Config::Mixin
12
+
13
+ config_name "output"
14
+
15
+ config :type, :validate => :string, :default => "", :obsolete => "You can achieve this same behavior with the new conditionals, like: `if [type] == \"sometype\" { %PLUGIN% { ... } }`."
16
+
17
+ config :tags, :validate => :array, :default => [], :obsolete => "You can achieve similar behavior with the new conditionals, like: `if \"sometag\" in [tags] { %PLUGIN% { ... } }`"
18
+
19
+ config :exclude_tags, :validate => :array, :default => [], :obsolete => "You can achieve similar behavior with the new conditionals, like: `if (\"sometag\" not in [tags]) { %PLUGIN% { ... } }`"
20
+
21
+ # The codec used for output data. Output codecs are a convenient method for encoding your data before it leaves the output, without needing a separate filter in your Logstash pipeline.
22
+ config :codec, :validate => :codec, :default => "plain"
23
+
24
+ # The number of workers to use for this output.
25
+ # Note that this setting may not be useful for all outputs.
26
+ config :workers, :validate => :number, :default => 1
27
+
28
+ attr_reader :worker_plugins, :available_workers, :workers, :worker_plugins, :workers_not_supported
29
+
30
+ def self.declare_threadsafe!
31
+ declare_workers_not_supported!
32
+ @threadsafe = true
33
+ end
34
+
35
+ def self.threadsafe?
36
+ @threadsafe == true
37
+ end
38
+
39
+ def self.declare_workers_not_supported!(message=nil)
40
+ @workers_not_supported_message = message
41
+ @workers_not_supported = true
42
+ end
43
+
44
+ def self.workers_not_supported_message
45
+ @workers_not_supported_message
46
+ end
47
+
48
+ def self.workers_not_supported?
49
+ !!@workers_not_supported
50
+ end
51
+
52
+ public
53
+ # TODO: Remove this in the next major version after Logstash 2.x
54
+ # Post 2.x it should raise an error and tell people to use the class level
55
+ # declaration
56
+ def workers_not_supported(message=nil)
57
+ self.class.declare_workers_not_supported!(message)
58
+ end
59
+
60
+ public
61
+ def initialize(params={})
62
+ super
63
+ config_init(@params)
64
+
65
+ # If we're running with a single thread we must enforce single-threaded concurrency by default
66
+ # Maybe in a future version we'll assume output plugins are threadsafe
67
+ @single_worker_mutex = Mutex.new
68
+ end
69
+
70
+ public
71
+ def register
72
+ raise "#{self.class}#register must be overidden"
73
+ end # def register
74
+
75
+ public
76
+ def receive(event)
77
+ raise "#{self.class}#receive must be overidden"
78
+ end # def receive
79
+
80
+ public
81
+ # To be overriden in implementations
82
+ def multi_receive(events)
83
+ events.each {|event| receive(event) }
84
+ end
85
+
86
+ private
87
+ def output?(event)
88
+ # TODO: noop for now, remove this once we delete this call from all plugins
89
+ true
90
+ end # def output?
91
+ end # class LogStash::Outputs::Base
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ require "logstash/patches/bugfix_jruby_2558"
3
+ require "logstash/patches/cabin"
4
+ require "logstash/patches/profile_require_calls"
5
+ require "logstash/patches/stronger_openssl_defaults"
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ require "logstash/environment"
3
+
4
+ if LogStash::Environment.windows? && LogStash::Environment.jruby?
5
+ require "socket"
6
+ module JRubyBug2558SocketPeerAddrBugFix
7
+ def peeraddr(*args)
8
+ orig_peeraddr(*args).map do |v|
9
+ case v
10
+ when String
11
+ v.force_encoding(Encoding::UTF_8)
12
+ else
13
+ v
14
+ end
15
+ end
16
+ end
17
+ end
18
+
19
+ class << Socket
20
+ # Bugfix for jruby #2558
21
+ alias_method :orig_gethostname, :gethostname
22
+ def gethostname
23
+ return orig_gethostname.force_encoding(Encoding::UTF_8)
24
+ end
25
+ end
26
+
27
+ class TCPSocket
28
+ alias_method :orig_peeraddr, :peeraddr
29
+ include JRubyBug2558SocketPeerAddrBugFix
30
+ end
31
+ class UDPSocket
32
+ alias_method :orig_peeraddr, :peeraddr
33
+ include JRubyBug2558SocketPeerAddrBugFix
34
+ end
35
+ end
36
+
37
+ if LogStash::Environment.windows?
38
+ # make sure all strings pulled out of ENV are UTF8
39
+ class <<ENV
40
+ alias_method :orig_getter, :[]
41
+ def [](key)
42
+ case value = orig_getter(key)
43
+ when String
44
+ # dup is necessary since force_encoding is destructive
45
+ value.dup.force_encoding(Encoding::UTF_8)
46
+ else
47
+ value
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+ if ENV["PROFILE_BAD_LOG_CALLS"] || ($DEBUGLIST || []).include?("log")
3
+ # Set PROFILE_BAD_LOG_CALLS=1 in your environment if you want
4
+ # to track down logger calls that cause performance problems
5
+ #
6
+ # Related research here:
7
+ # https://github.com/jordansissel/experiments/tree/master/ruby/logger-string-vs-block
8
+ #
9
+ # Basically, the following is wastes tons of effort creating objects that are
10
+ # never used if the log level hides the log:
11
+ #
12
+ # logger.debug("something happend", :what => Happened)
13
+ #
14
+ # This is shown to be 4x faster:
15
+ #
16
+ # logger.debug(...) if logger.debug?
17
+ #
18
+ # I originally intended to use RubyParser and SexpProcessor to
19
+ # process all the logstash ruby code offline, but it was much
20
+ # faster to write this monkeypatch to warn as things are called.
21
+ require "cabin/mixins/logger"
22
+ module Cabin::Mixins::Logger
23
+ LEVELS.keys.each do |level|
24
+ m = "original_#{level}".to_sym
25
+ predicate = "#{level}?".to_sym
26
+ alias_method m, level
27
+ define_method(level) do |*args|
28
+ if !send(predicate)
29
+ warn("Unconditional log call", :location => caller[0])
30
+ end
31
+ send(m, *args)
32
+ end
33
+ end
34
+ end
35
+ end # PROFILE_BAD_LOG_CALLS
@@ -0,0 +1,47 @@
1
+ # encoding: utf-8
2
+ if ($DEBUGLIST || []).include?("require")
3
+ ROOT = File.dirname(__FILE__)
4
+ module Kernel
5
+ alias_method :require_debug, :require
6
+
7
+ def require(path)
8
+ start = Time.now
9
+ result = require_debug(path)
10
+ duration = Time.now - start
11
+
12
+ origin = caller[1]
13
+ if origin =~ /rubygems\/custom_require/
14
+ origin = caller[3]
15
+ if origin.nil?
16
+ STDERR.puts "Unknown origin"
17
+ STDERR.puts caller.join("\n")
18
+ end
19
+ end
20
+ origin = origin.gsub(/:[0-9]+:in .*/, "") if origin
21
+
22
+ # Only print require() calls that did actual work.
23
+ # require() returns true on load, false if already loaded.
24
+ if result
25
+ source = caller[0]
26
+ #p source.include?("/lib/polyglot.rb:63:in `require'") => source
27
+ if source.include?("/lib/polyglot.rb:63:in `require'")
28
+ source = caller[1]
29
+ end
30
+
31
+ #target = $LOADED_FEATURES.grep(/#{path}/).first
32
+ #puts path
33
+ #puts caller.map { |c| " #{c}" }.join("\n")
34
+ #fontsize = [10, duration * 48].max
35
+ puts "#{duration},#{path},#{source}"
36
+ end
37
+ #puts caller.map { |c| " => #{c}" }.join("\n")
38
+ end
39
+
40
+ alias_method :load_debug, :load
41
+
42
+ def load(path)
43
+ puts "load(\"#{path}\")"
44
+ return load_debug(path)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+ # monkey patch RubyGems to silence ffi warnings:
3
+ #
4
+ # WARN: Unresolved specs during Gem::Specification.reset:
5
+ # ffi (>= 0)
6
+ # WARN: Clearing out unresolved specs.
7
+ # Please report a bug if this causes problems.
8
+ #
9
+ # see https://github.com/elasticsearch/logstash/issues/2556 and https://github.com/rubygems/rubygems/issues/1070
10
+ #
11
+ # this code is from Rubygems v2.1.9 in JRuby 1.7.17. Per tickets this issue should be solved at JRuby >= 1.7.20.
12
+ #
13
+ # this method implementation works for Rubygems version 2.1.0 and up, verified up to 2.4.6
14
+ if ::Gem::Version.new(::Gem::VERSION) >= ::Gem::Version.new("2.1.0") && ::Gem::Version.new(::Gem::VERSION) < ::Gem::Version.new("2.5.0")
15
+ class ::Gem::Specification
16
+ def self.reset
17
+ @@dirs = nil
18
+ ::Gem.pre_reset_hooks.each { |hook| hook.call }
19
+ @@all = nil
20
+ @@stubs = nil
21
+ _clear_load_cache
22
+ unresolved = unresolved_deps
23
+ unless unresolved.empty?
24
+ unless (unresolved.size == 1 && unresolved["ffi"])
25
+ w = "W" + "ARN"
26
+ warn "#{w}: Unresolved specs during Gem::Specification.reset:"
27
+ unresolved.values.each do |dep|
28
+ warn " #{dep}"
29
+ end
30
+ warn "#{w}: Clearing out unresolved specs."
31
+ warn "Please report a bug if this causes problems."
32
+ end
33
+ unresolved.clear
34
+ end
35
+ ::Gem.post_reset_hooks.each { |hook| hook.call }
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,68 @@
1
+ # encoding: utf-8
2
+ require "openssl"
3
+
4
+ # :nodoc:
5
+ class OpenSSL::SSL::SSLContext
6
+ # Wrap SSLContext.new to a stronger default settings.
7
+ class << self
8
+ alias_method :orig_new, :new
9
+ def new(*args)
10
+ c = orig_new(*args)
11
+
12
+ # MRI nor JRuby seem to actually invoke `SSLContext#set_params` by
13
+ # default, which makes the default ciphers (and other settings) not
14
+ # actually defaults. Oops!
15
+ # To force this, and force our (hopefully more secure) defaults on
16
+ # all things using openssl in Ruby, we will invoke set_params
17
+ # on all new SSLContext objects.
18
+ c.set_params
19
+ c
20
+ end
21
+ end
22
+
23
+ # This cipher selection comes from https://wiki.mozilla.org/Security/Server_Side_TLS
24
+ MOZILLA_INTERMEDIATE_CIPHERS = "ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-DSS-AES128-GCM-SHA256:kEDH+AESGCM:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA:ECDHE-ECDSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-DSS-AES128-SHA256:DHE-RSA-AES256-SHA256:DHE-DSS-AES256-SHA:DHE-RSA-AES256-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:AES:CAMELLIA:DES-CBC3-SHA:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!aECDH:!EDH-DSS-DES-CBC3-SHA:!EDH-RSA-DES-CBC3-SHA:!KRB5-DES-CBC3-SHA"
25
+
26
+ # Returns the value that should be used for the default SSLContext options
27
+ #
28
+ # This is a method instead of a constant because some constants (like
29
+ # OpenSSL::SSL::OP_NO_COMPRESSION) may not be available in all Ruby
30
+ # versions/platforms.
31
+ def self.__default_options
32
+ # ruby-core is refusing to patch ruby's default openssl settings to be more
33
+ # secure, so let's fix that here. The next few lines setting options and
34
+ # ciphers come from jmhodges' proposed patch
35
+ ssloptions = OpenSSL::SSL::OP_ALL
36
+
37
+ # TODO(sissel): JRuby doesn't have this. Maybe work on a fix?
38
+ if defined?(OpenSSL::SSL::OP_DONT_INSERT_EMPTY_FRAGMENTS)
39
+ ssloptions &= ~OpenSSL::SSL::OP_DONT_INSERT_EMPTY_FRAGMENTS
40
+ end
41
+
42
+ # TODO(sissel): JRuby doesn't have this. Maybe work on a fix?
43
+ if defined?(OpenSSL::SSL::OP_NO_COMPRESSION)
44
+ ssloptions |= OpenSSL::SSL::OP_NO_COMPRESSION
45
+ end
46
+
47
+ # Disable SSLv2 and SSLv3. They are insecure and highly discouraged.
48
+ ssloptions |= OpenSSL::SSL::OP_NO_SSLv2 if defined?(OpenSSL::SSL::OP_NO_SSLv2)
49
+ ssloptions |= OpenSSL::SSL::OP_NO_SSLv3 if defined?(OpenSSL::SSL::OP_NO_SSLv3)
50
+ ssloptions
51
+ end
52
+
53
+ # Overwriting the DEFAULT_PARAMS const idea from here: https://www.ruby-lang.org/en/news/2014/10/27/changing-default-settings-of-ext-openssl/
54
+ #
55
+ # This monkeypatch doesn't enforce a `VERIFY_MODE` on the SSLContext,
56
+ # SSLContext are both used for the client and the server implementation,
57
+ # If set the `verify_mode` to peer the server wont accept any connection,
58
+ # because it will try to verify the client certificate, this is a protocol
59
+ # details implemented at the plugin level.
60
+ #
61
+ # For more details see: https://github.com/elastic/logstash/issues/3657
62
+ remove_const(:DEFAULT_PARAMS) if const_defined?(:DEFAULT_PARAMS)
63
+ DEFAULT_PARAMS = {
64
+ :ssl_version => "TLS",
65
+ :ciphers => MOZILLA_INTERMEDIATE_CIPHERS,
66
+ :options => __default_options # Not a constant because it's computed at start-time.
67
+ }
68
+ end
@@ -0,0 +1,499 @@
1
+ # encoding: utf-8
2
+ require "thread"
3
+ require "stud/interval"
4
+ require "concurrent"
5
+ require "logstash/namespace"
6
+ require "logstash/errors"
7
+ require "logstash/event"
8
+ require "logstash/config/file"
9
+ require "logstash/filters/base"
10
+ require "logstash/inputs/base"
11
+ require "logstash/outputs/base"
12
+ require "logstash/config/cpu_core_strategy"
13
+ require "logstash/util/defaults_printer"
14
+ require "logstash/shutdown_watcher"
15
+ require "logstash/util/wrapped_synchronous_queue"
16
+ require "logstash/pipeline_reporter"
17
+ require "logstash/output_delegator"
18
+
19
+ module LogStash; class Pipeline
20
+ attr_reader :inputs, :filters, :outputs, :worker_threads, :events_consumed, :events_filtered, :reporter, :pipeline_id, :logger
21
+
22
+ DEFAULT_SETTINGS = {
23
+ :default_pipeline_workers => LogStash::Config::CpuCoreStrategy.maximum,
24
+ :pipeline_batch_size => 125,
25
+ :pipeline_batch_delay => 5, # in milliseconds
26
+ :flush_interval => 5, # in seconds
27
+ :flush_timeout_interval => 60, # in seconds
28
+ :debug_config => false
29
+ }
30
+ MAX_INFLIGHT_WARN_THRESHOLD = 10_000
31
+
32
+ def initialize(config_str, settings = {})
33
+ @pipeline_id = settings[:pipeline_id] || self.object_id
34
+ @logger = Cabin::Channel.get(LogStash)
35
+ @settings = DEFAULT_SETTINGS.clone
36
+ settings.each {|setting, value| configure(setting, value) }
37
+ @reporter = LogStash::PipelineReporter.new(@logger, self)
38
+
39
+ @inputs = nil
40
+ @filters = nil
41
+ @outputs = nil
42
+
43
+ @worker_threads = []
44
+
45
+ grammar = LogStashConfigParser.new
46
+ @config = grammar.parse(config_str)
47
+ if @config.nil?
48
+ raise LogStash::ConfigurationError, grammar.failure_reason
49
+ end
50
+ # This will compile the config to ruby and evaluate the resulting code.
51
+ # The code will initialize all the plugins and define the
52
+ # filter and output methods.
53
+ code = @config.compile
54
+ # The config code is hard to represent as a log message...
55
+ # So just print it.
56
+ if @settings[:debug_config]
57
+ @logger.debug? && @logger.debug("Compiled pipeline code:\n#{code}")
58
+ end
59
+ begin
60
+ eval(code)
61
+ rescue => e
62
+ raise
63
+ end
64
+
65
+ @input_queue = LogStash::Util::WrappedSynchronousQueue.new
66
+ @events_filtered = Concurrent::AtomicFixnum.new(0)
67
+ @events_consumed = Concurrent::AtomicFixnum.new(0)
68
+
69
+ # We generally only want one thread at a time able to access pop/take/poll operations
70
+ # from this queue. We also depend on this to be able to block consumers while we snapshot
71
+ # in-flight buffers
72
+ @input_queue_pop_mutex = Mutex.new
73
+ @input_threads = []
74
+ # @ready requires thread safety since it is typically polled from outside the pipeline thread
75
+ @ready = Concurrent::AtomicBoolean.new(false)
76
+ @running = Concurrent::AtomicBoolean.new(false)
77
+ @flushing = Concurrent::AtomicReference.new(false)
78
+ end # def initialize
79
+
80
+ def ready?
81
+ @ready.value
82
+ end
83
+
84
+ def configure(setting, value)
85
+ @settings[setting] = value
86
+ end
87
+
88
+ def safe_pipeline_worker_count
89
+ default = DEFAULT_SETTINGS[:default_pipeline_workers]
90
+ thread_count = @settings[:pipeline_workers] #override from args "-w 8" or config
91
+ safe_filters, unsafe_filters = @filters.partition(&:threadsafe?)
92
+
93
+ if unsafe_filters.any?
94
+ plugins = unsafe_filters.collect { |f| f.class.config_name }
95
+ case thread_count
96
+ when nil
97
+ # user did not specify a worker thread count
98
+ # warn if the default is multiple
99
+
100
+ if default > 1
101
+ @logger.warn("Defaulting pipeline worker threads to 1 because there are some filters that might not work with multiple worker threads",
102
+ :count_was => default, :filters => plugins)
103
+ end
104
+
105
+ 1 # can't allow the default value to propagate if there are unsafe filters
106
+ when 0, 1
107
+ 1
108
+ else
109
+ @logger.warn("Warning: Manual override - there are filters that might not work with multiple worker threads",
110
+ :worker_threads => thread_count, :filters => plugins)
111
+ thread_count # allow user to force this even if there are unsafe filters
112
+ end
113
+ else
114
+ thread_count || default
115
+ end
116
+ end
117
+
118
+ def filters?
119
+ return @filters.any?
120
+ end
121
+
122
+ def run
123
+ LogStash::Util.set_thread_name("[#{pipeline_id}]-pipeline-manager")
124
+ @logger.terminal(LogStash::Util::DefaultsPrinter.print(@settings))
125
+
126
+ start_workers
127
+
128
+ @logger.info("Pipeline started")
129
+ @logger.terminal("Logstash startup completed")
130
+
131
+ # Block until all inputs have stopped
132
+ # Generally this happens if SIGINT is sent and `shutdown` is called from an external thread
133
+
134
+ transition_to_running
135
+ start_flusher # Launches a non-blocking thread for flush events
136
+ wait_inputs
137
+ transition_to_stopped
138
+
139
+ @logger.info("Input plugins stopped! Will shutdown filter/output workers.")
140
+
141
+ shutdown_flusher
142
+ shutdown_workers
143
+
144
+ @logger.info("Pipeline shutdown complete.")
145
+ @logger.terminal("Logstash shutdown completed")
146
+
147
+ # exit code
148
+ return 0
149
+ end # def run
150
+
151
+ def transition_to_running
152
+ @running.make_true
153
+ end
154
+
155
+ def transition_to_stopped
156
+ @running.make_false
157
+ end
158
+
159
+ def running?
160
+ @running.true?
161
+ end
162
+
163
+ def stopped?
164
+ @running.false?
165
+ end
166
+
167
+ def start_workers
168
+ @inflight_batches = {}
169
+
170
+ @worker_threads.clear # In case we're restarting the pipeline
171
+ begin
172
+ start_inputs
173
+ @outputs.each {|o| o.register }
174
+ @filters.each {|f| f.register}
175
+
176
+ pipeline_workers = safe_pipeline_worker_count
177
+ batch_size = @settings[:pipeline_batch_size]
178
+ batch_delay = @settings[:pipeline_batch_delay]
179
+ max_inflight = batch_size * pipeline_workers
180
+ @logger.info("Starting pipeline",
181
+ :id => self.pipeline_id,
182
+ :pipeline_workers => pipeline_workers,
183
+ :batch_size => batch_size,
184
+ :batch_delay => batch_delay,
185
+ :max_inflight => max_inflight)
186
+ if max_inflight > MAX_INFLIGHT_WARN_THRESHOLD
187
+ @logger.warn "CAUTION: Recommended inflight events max exceeded! Logstash will run with up to #{max_inflight} events in memory in your current configuration. If your message sizes are large this may cause instability with the default heap size. Please consider setting a non-standard heap size, changing the batch size (currently #{batch_size}), or changing the number of pipeline workers (currently #{pipeline_workers})"
188
+ end
189
+
190
+ pipeline_workers.times do |t|
191
+ @worker_threads << Thread.new do
192
+ LogStash::Util.set_thread_name("[#{pipeline_id}]>worker#{t}")
193
+ worker_loop(batch_size, batch_delay)
194
+ end
195
+ end
196
+ ensure
197
+ # it is important to garantee @ready to be true after the startup sequence has been completed
198
+ # to potentially unblock the shutdown method which may be waiting on @ready to proceed
199
+ @ready.make_true
200
+ end
201
+ end
202
+
203
+ # Main body of what a worker thread does
204
+ # Repeatedly takes batches off the queu, filters, then outputs them
205
+ def worker_loop(batch_size, batch_delay)
206
+ running = true
207
+
208
+ while running
209
+ # To understand the purpose behind this synchronize please read the body of take_batch
210
+ input_batch, signal = @input_queue_pop_mutex.synchronize { take_batch(batch_size, batch_delay) }
211
+ running = false if signal == LogStash::SHUTDOWN
212
+
213
+ @events_consumed.increment(input_batch.size)
214
+
215
+ filtered_batch = filter_batch(input_batch)
216
+
217
+ if signal # Flush on SHUTDOWN or FLUSH
218
+ flush_options = (signal == LogStash::SHUTDOWN) ? {:final => true} : {}
219
+ flush_filters_to_batch(filtered_batch, flush_options)
220
+ end
221
+
222
+ @events_filtered.increment(filtered_batch.size)
223
+
224
+ output_batch(filtered_batch)
225
+
226
+ inflight_batches_synchronize { set_current_thread_inflight_batch(nil) }
227
+ end
228
+ end
229
+
230
+ def take_batch(batch_size, batch_delay)
231
+ batch = []
232
+ # Since this is externally synchronized in `worker_look` wec can guarantee that the visibility of an insight batch
233
+ # guaranteed to be a full batch not a partial batch
234
+ set_current_thread_inflight_batch(batch)
235
+
236
+ signal = false
237
+ batch_size.times do |t|
238
+ event = (t == 0) ? @input_queue.take : @input_queue.poll(batch_delay)
239
+
240
+ if event.nil?
241
+ next
242
+ elsif event == LogStash::SHUTDOWN || event == LogStash::FLUSH
243
+ # We MUST break here. If a batch consumes two SHUTDOWN events
244
+ # then another worker may have its SHUTDOWN 'stolen', thus blocking
245
+ # the pipeline. We should stop doing work after flush as well.
246
+ signal = event
247
+ break
248
+ else
249
+ batch << event
250
+ end
251
+ end
252
+
253
+ [batch, signal]
254
+ end
255
+
256
+ def filter_batch(batch)
257
+ batch.reduce([]) do |acc,e|
258
+ if e.is_a?(LogStash::Event)
259
+ filtered = filter_func(e)
260
+ filtered.each {|fe| acc << fe unless fe.cancelled?}
261
+ end
262
+ acc
263
+ end
264
+ rescue Exception => e
265
+ # Plugins authors should manage their own exceptions in the plugin code
266
+ # but if an exception is raised up to the worker thread they are considered
267
+ # fatal and logstash will not recover from this situation.
268
+ #
269
+ # Users need to check their configuration or see if there is a bug in the
270
+ # plugin.
271
+ @logger.error("Exception in pipelineworker, the pipeline stopped processing new events, please check your filter configuration and restart Logstash.",
272
+ "exception" => e, "backtrace" => e.backtrace)
273
+ raise
274
+ end
275
+
276
+ # Take an array of events and send them to the correct output
277
+ def output_batch(batch)
278
+ # Build a mapping of { output_plugin => [events...]}
279
+ outputs_events = batch.reduce(Hash.new { |h, k| h[k] = [] }) do |acc, event|
280
+ # We ask the AST to tell us which outputs to send each event to
281
+ # Then, we stick it in the correct bin
282
+
283
+ # output_func should never return anything other than an Array but we have lots of legacy specs
284
+ # that monkeypatch it and return nil. We can deprecate "|| []" after fixing these specs
285
+ outputs_for_event = output_func(event) || []
286
+
287
+ outputs_for_event.each { |output| acc[output] << event }
288
+ acc
289
+ end
290
+
291
+ # Now that we have our output to event mapping we can just invoke each output
292
+ # once with its list of events
293
+ outputs_events.each { |output, events| output.multi_receive(events) }
294
+ end
295
+
296
+ def set_current_thread_inflight_batch(batch)
297
+ @inflight_batches[Thread.current] = batch
298
+ end
299
+
300
+ def inflight_batches_synchronize
301
+ @input_queue_pop_mutex.synchronize do
302
+ yield(@inflight_batches)
303
+ end
304
+ end
305
+
306
+ def wait_inputs
307
+ @input_threads.each(&:join)
308
+ end
309
+
310
+ def start_inputs
311
+ moreinputs = []
312
+ @inputs.each do |input|
313
+ if input.threadable && input.threads > 1
314
+ (input.threads - 1).times do |i|
315
+ moreinputs << input.clone
316
+ end
317
+ end
318
+ end
319
+ @inputs += moreinputs
320
+
321
+ @inputs.each do |input|
322
+ input.register
323
+ start_input(input)
324
+ end
325
+ end
326
+
327
+ def start_input(plugin)
328
+ @input_threads << Thread.new { inputworker(plugin) }
329
+ end
330
+
331
+ def inputworker(plugin)
332
+ LogStash::Util::set_thread_name("[#{pipeline_id}]<#{plugin.class.config_name}")
333
+ begin
334
+ plugin.run(@input_queue)
335
+ rescue => e
336
+ if plugin.stop?
337
+ @logger.debug("Input plugin raised exception during shutdown, ignoring it.",
338
+ :plugin => plugin.class.config_name, :exception => e,
339
+ :backtrace => e.backtrace)
340
+ return
341
+ end
342
+
343
+ # otherwise, report error and restart
344
+ if @logger.debug?
345
+ @logger.error(I18n.t("logstash.pipeline.worker-error-debug",
346
+ :plugin => plugin.inspect, :error => e.to_s,
347
+ :exception => e.class,
348
+ :stacktrace => e.backtrace.join("\n")))
349
+ else
350
+ @logger.error(I18n.t("logstash.pipeline.worker-error",
351
+ :plugin => plugin.inspect, :error => e))
352
+ end
353
+
354
+ # Assuming the failure that caused this exception is transient,
355
+ # let's sleep for a bit and execute #run again
356
+ sleep(1)
357
+ retry
358
+ ensure
359
+ plugin.do_close
360
+ end
361
+ end # def inputworker
362
+
363
+ # initiate the pipeline shutdown sequence
364
+ # this method is intended to be called from outside the pipeline thread
365
+ # @param before_stop [Proc] code block called before performing stop operation on input plugins
366
+ def shutdown(&before_stop)
367
+ # shutdown can only start once the pipeline has completed its startup.
368
+ # avoid potential race conditoon between the startup sequence and this
369
+ # shutdown method which can be called from another thread at any time
370
+ sleep(0.1) while !ready?
371
+
372
+ # TODO: should we also check against calling shutdown multiple times concurently?
373
+
374
+ before_stop.call if block_given?
375
+
376
+ @logger.info "Closing inputs"
377
+ @inputs.each(&:do_stop)
378
+ @logger.info "Closed inputs"
379
+ end # def shutdown
380
+
381
+ # After `shutdown` is called from an external thread this is called from the main thread to
382
+ # tell the worker threads to stop and then block until they've fully stopped
383
+ # This also stops all filter and output plugins
384
+ def shutdown_workers
385
+ # Each worker thread will receive this exactly once!
386
+ @worker_threads.each do |t|
387
+ @logger.debug("Pushing shutdown", :thread => t)
388
+ @input_queue.push(LogStash::SHUTDOWN)
389
+ end
390
+
391
+ @worker_threads.each do |t|
392
+ @logger.debug("Shutdown waiting for worker thread #{t}")
393
+ t.join
394
+ end
395
+
396
+ @filters.each(&:do_close)
397
+ @outputs.each(&:do_close)
398
+ end
399
+
400
+ def plugin(plugin_type, name, *args)
401
+ args << {} if args.empty?
402
+
403
+ klass = LogStash::Plugin.lookup(plugin_type, name)
404
+
405
+ if plugin_type == "output"
406
+ LogStash::OutputDelegator.new(@logger, klass, default_output_workers, *args)
407
+ else
408
+ klass.new(*args)
409
+ end
410
+ end
411
+
412
+ def default_output_workers
413
+ @settings[:pipeline_workers] || @settings[:default_pipeline_workers]
414
+ end
415
+
416
+ # for backward compatibility in devutils for the rspec helpers, this method is not used
417
+ # in the pipeline anymore.
418
+ def filter(event, &block)
419
+ # filter_func returns all filtered events, including cancelled ones
420
+ filter_func(event).each { |e| block.call(e) }
421
+ end
422
+
423
+
424
+ # perform filters flush and yeild flushed event to the passed block
425
+ # @param options [Hash]
426
+ # @option options [Boolean] :final => true to signal a final shutdown flush
427
+ def flush_filters(options = {}, &block)
428
+ flushers = options[:final] ? @shutdown_flushers : @periodic_flushers
429
+
430
+ flushers.each do |flusher|
431
+ flusher.call(options, &block)
432
+ end
433
+ end
434
+
435
+ def start_flusher
436
+ # Invariant to help detect improper initialization
437
+ raise "Attempted to start flusher on a stopped pipeline!" if stopped?
438
+
439
+ @flusher_thread = Thread.new do
440
+ while Stud.stoppable_sleep(5, 0.1) { stopped? }
441
+ flush
442
+ break if stopped?
443
+ end
444
+ end
445
+ end
446
+
447
+ def shutdown_flusher
448
+ @flusher_thread.join
449
+ end
450
+
451
+ def flush
452
+ if @flushing.compare_and_set(false, true)
453
+ @logger.debug? && @logger.debug("Pushing flush onto pipeline")
454
+ @input_queue.push(LogStash::FLUSH)
455
+ end
456
+ end
457
+
458
+ # perform filters flush into the output queue
459
+ # @param options [Hash]
460
+ # @option options [Boolean] :final => true to signal a final shutdown flush
461
+ def flush_filters_to_batch(batch, options = {})
462
+ flush_filters(options) do |event|
463
+ unless event.cancelled?
464
+ @logger.debug? and @logger.debug("Pushing flushed events", :event => event)
465
+ batch << event
466
+ end
467
+ end
468
+
469
+ @flushing.set(false)
470
+ end # flush_filters_to_output!
471
+
472
+ def plugin_threads_info
473
+ input_threads = @input_threads.select {|t| t.alive? }
474
+ worker_threads = @worker_threads.select {|t| t.alive? }
475
+ (input_threads + worker_threads).map {|t| LogStash::Util.thread_info(t) }
476
+ end
477
+
478
+ def stalling_threads_info
479
+ plugin_threads_info
480
+ .reject {|t| t["blocked_on"] } # known benign blocking statuses
481
+ .each {|t| t.delete("backtrace") }
482
+ .each {|t| t.delete("blocked_on") }
483
+ .each {|t| t.delete("status") }
484
+ end
485
+
486
+ # Sometimes we log stuff that will dump the pipeline which may contain
487
+ # sensitive information (like the raw syntax tree which can contain passwords)
488
+ # We want to hide most of what's in here
489
+ def inspect
490
+ {
491
+ :pipeline_id => @pipeline_id,
492
+ :settings => @settings.inspect,
493
+ :ready => @ready,
494
+ :running => @running,
495
+ :flushing => @flushing
496
+ }
497
+ end
498
+
499
+ end end