logstash-input-beats 2.0.3 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 86b68287fce7667ece60ab81196daf597a34bf0f
4
- data.tar.gz: eba32adca86f4b4fa7c37c7755ab524498e87b8f
3
+ metadata.gz: dcc82edf516cc926764c9bf592b6b4240d2b250a
4
+ data.tar.gz: afd96ca6287561a27ef83d1e5070785cc507da00
5
5
  SHA512:
6
- metadata.gz: 2199de568cb60a38bf73f0fdf61a24ff75a5596e2aaf984f1c468f6c1d8ac505e4d7717e7bf77ff4af2579678b57c1fc48125f3a72a02e86d302d0184ab3d514
7
- data.tar.gz: 86bae83fb7c1767eec85021c47c7de6b003ed725fa5dfb6e8b2f64e2fa3fe1597e70d04e356145b836c8d991545c7733949bd420d41a300b39898d7a587cbe4d
6
+ metadata.gz: 9a4d8d3e817adaad0e49659c713ba864e2d32837ca39a751cb78d026bc71b215564048e7db7912934a551ff60c0aa2dc3ae52ae06b5f9ee4d4bd25814e4277f2
7
+ data.tar.gz: 0416a16c50b648351929119370f8ea513ff2dfd1657afd47b7332e6ef3758c3381505ce639ba122790024a638272dd91e766f43fd0cd277143b32e66382f7578
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ # 2.1.1
2
+ - Release a new version of the gem that doesn't included any other gems, 2.1.0 is yanked from rubygems
3
+ # 2.1.0
4
+ - Refactor of the code to make it easier to unit test
5
+ - Fix a conncurrency error on high load on the SizeQueue #37
6
+ - Drop the internal SizeQueue to rely on Java Synchronous Queue
7
+ - Remove the majority of the nested blocks
8
+ - Move the CircuitBreaker inside an internal namespace so it doesn't conflict with the input lumberjack
9
+ - Add more debugging log statement
10
+ - Flush the codec when a disconnect happen
11
+ - Tag/Decorate the event when a shutdown occur.
12
+ - The name of the threads managed by the input beat are now meaningful.
1
13
  # 2.0.3
2
14
  - Reduce the size of the gem by removing vendor jars
3
15
  # 2.0.2
@@ -5,6 +5,15 @@ require "logstash/timestamp"
5
5
  require "lumberjack/beats"
6
6
  require "lumberjack/beats/server"
7
7
  require "logstash/codecs/identity_map_codec"
8
+ require "logstash/inputs/beats_support/circuit_breaker"
9
+ require "logstash/inputs/beats_support/codec_callback_listener"
10
+ require "logstash/inputs/beats_support/connection_handler"
11
+ require "logstash/inputs/beats_support/event_transform_common"
12
+ require "logstash/inputs/beats_support/decoded_event_transform"
13
+ require "logstash/inputs/beats_support/raw_event_transform"
14
+ require "logstash/inputs/beats_support/synchronous_queue_with_offer"
15
+ require "logstash/util"
16
+ require "thread_safe"
8
17
 
9
18
  # use Logstash provided json decoder
10
19
  Lumberjack::Beats::json = LogStash::Json
@@ -13,7 +22,28 @@ Lumberjack::Beats::json = LogStash::Json
13
22
  #
14
23
  # https://github.com/elastic/filebeat[filebeat]
15
24
  #
25
+
26
+ class LogStash::Codecs::Base
27
+ # This monkey patch add callback based
28
+ # flow to the codec until its shipped with core.
29
+ # This give greater flexibility to the implementation by
30
+ # sending more data to the actual block.
31
+ if !method_defined?(:accept)
32
+ def accept(listener)
33
+ decode(listener.data) do |event|
34
+ listener.process_event(event)
35
+ end
36
+ end
37
+ end
38
+ if !method_defined?(:auto_flush)
39
+ def auto_flush
40
+ end
41
+ end
42
+ end
43
+
16
44
  class LogStash::Inputs::Beats < LogStash::Inputs::Base
45
+ class InsertingToQueueTakeTooLong < Exception; end
46
+
17
47
  config_name "beats"
18
48
 
19
49
  default :codec, "plain"
@@ -46,41 +76,37 @@ class LogStash::Inputs::Beats < LogStash::Inputs::Base
46
76
  config :target_field_for_codec, :validate => :string, :default => "message"
47
77
 
48
78
  # TODO(sissel): Add CA to authenticate clients with.
49
- BUFFERED_QUEUE_SIZE = 1
50
79
  RECONNECT_BACKOFF_SLEEP = 0.5
51
80
 
52
81
  def register
53
- require "concurrent"
54
- require "logstash/circuit_breaker"
55
- require "logstash/sized_queue_timeout"
56
-
57
82
  if !@ssl
58
- @logger.warn("Beats: SSL Certificate will not be used") unless @ssl_certificate.nil?
59
- @logger.warn("Beats: SSL Key will not be used") unless @ssl_key.nil?
83
+ @logger.warn("Beats input: SSL Certificate will not be used") unless @ssl_certificate.nil?
84
+ @logger.warn("Beats input: SSL Key will not be used") unless @ssl_key.nil?
60
85
  elsif !ssl_configured?
61
86
  raise LogStash::ConfigurationError, "Certificate or Certificate Key not configured"
62
87
  end
63
88
 
64
- @logger.info("Starting Beats input listener", :address => "#{@host}:#{@port}")
89
+ @logger.info("Beats inputs: Starting input listener", :address => "#{@host}:#{@port}")
65
90
  @lumberjack = Lumberjack::Beats::Server.new(:address => @host, :port => @port,
66
91
  :ssl => @ssl, :ssl_certificate => @ssl_certificate, :ssl_key => @ssl_key,
67
92
  :ssl_key_passphrase => @ssl_key_passphrase)
68
93
 
69
- # Create a reusable threadpool, we do not limit the number of connections
70
- # to the input, the circuit breaker with the timeout should take care
71
- # of `blocked` threads and prevent logstash to go oom.
72
- @threadpool = Concurrent::CachedThreadPool.new(:idletime => 15)
73
-
74
94
  # in 1.5 the main SizeQueue doesnt have the concept of timeout
75
95
  # We are using a small plugin buffer to move events to the internal queue
76
- @buffered_queue = LogStash::SizedQueueTimeout.new(BUFFERED_QUEUE_SIZE)
96
+ @buffered_queue = LogStash::Inputs::BeatsSupport::SynchronousQueueWithOffer.new(@congestion_threshold)
77
97
 
78
- @circuit_breaker = LogStash::CircuitBreaker.new("Beats input",
79
- :exceptions => [LogStash::SizedQueueTimeout::TimeoutError])
98
+ @circuit_breaker = LogStash::Inputs::BeatsSupport::CircuitBreaker.new("Beats input",
99
+ :exceptions => [InsertingToQueueTakeTooLong])
80
100
 
81
101
  # wrap the configured codec to support identity stream
82
102
  # from the producers
83
103
  @codec = LogStash::Codecs::IdentityMapCodec.new(@codec)
104
+
105
+ # Keep a list of active connections so we can flush their codec on shutdown
106
+
107
+ # Use threadsafe gem, since we have a strict dependency on concurrent-ruby 0.9.2
108
+ # in the core
109
+ @connections_list = ThreadSafe::Hash.new
84
110
  end # def register
85
111
 
86
112
  def ssl_configured?
@@ -99,104 +125,82 @@ class LogStash::Inputs::Beats < LogStash::Inputs::Base
99
125
  # Wrapping the accept call into a CircuitBreaker
100
126
  if @circuit_breaker.closed?
101
127
  connection = @lumberjack.accept # call that creates a new connection
102
- next if connection.nil? # if the connection is nil the connection was close.
103
-
104
- invoke(connection) do |event|
105
- if stop?
106
- connection.close
107
- break
108
- end
109
-
110
- begin
111
- @circuit_breaker.execute {
112
- @buffered_queue.push(event, @congestion_threshold)
113
- }
114
- rescue => e
115
- raise e
116
- end
128
+ # if the connection is nil the connection was closed upstream,
129
+ # so we will try in another iteration to recover or stop.
130
+ next if connection.nil?
131
+
132
+ Thread.new do
133
+ handle_new_connection(connection)
117
134
  end
118
135
  else
119
- @logger.warn("Beats input: the pipeline is blocked, temporary refusing new connection.")
136
+ @logger.warn("Beats input: the pipeline is blocked, temporary refusing new connection.",
137
+ :reconnect_backoff_sleep => RECONNECT_BACKOFF_SLEEP)
120
138
  sleep(RECONNECT_BACKOFF_SLEEP)
121
139
  end
122
140
  end
123
141
  end # def run
124
142
 
125
- public
126
143
  def stop
127
- # we may have some stuff in the buffer
128
- @codec.flush { |event| @output_queue << event }
144
+ @logger.debug("Beats input: stopping the plugin")
145
+
129
146
  @lumberjack.close rescue nil
130
- end
131
147
 
132
- public
133
- def create_event(map, identity_stream, &block)
134
- # Filebeats uses the `message` key and LSF `line`
135
- target_field = target_field_for_codec ? map.delete(target_field_for_codec) : nil
136
-
137
- if target_field.nil?
138
- event = LogStash::Event.new(map)
139
- copy_beat_hostname(event)
140
- decorate(event)
141
- block.call(event)
142
- else
143
- # All codecs expects to work on string
144
- @codec.decode(target_field.to_s, identity_stream) do |decoded|
145
- ts = coerce_ts(map.delete("@timestamp"))
146
- decoded["@timestamp"] = ts unless ts.nil?
147
- map.each { |k, v| decoded[k] = v }
148
- copy_beat_hostname(decoded)
149
- decorate(decoded)
150
- block.call(decoded)
148
+ # we may have some stuff in the codec buffer
149
+ transformer = LogStash::Inputs::BeatsSupport::EventTransformCommon.new(self)
150
+
151
+ # Go through all the active connection and flush their
152
+ # codec content, some context data could be lost in this case
153
+ # but at least the events main data would be persisted.
154
+ @connections_list.each do |_, connection_handler|
155
+ connection_handler.flush do |event|
156
+ # We might loose some context of the
157
+ transformer.transform(event)
158
+ event.tag("beats_input_flushed_by_logtash_shutdown")
159
+ @output_queue << event
151
160
  end
152
161
  end
153
- end
154
162
 
155
- # Copies the beat.hostname field into the host field unless
156
- # the host field is already defined
157
- private
158
- def copy_beat_hostname(event)
159
- host = event["beat"] ? event["beat"]["hostname"] : nil
160
- if host && event["host"].nil?
161
- event["host"] = host
162
- end
163
+ @logger.debug("Beats input: stopped")
163
164
  end
164
165
 
165
- private
166
- def coerce_ts(ts)
167
- return nil if ts.nil?
168
- timestamp = LogStash::Timestamp.coerce(ts)
169
- return timestamp if timestamp
170
-
171
- @logger.warn("Unrecognized @timestamp value, setting current time to @timestamp",
172
- :value => ts.inspect)
173
- rescue LogStash::TimestampParserError => e
174
- @logger.warn("Error parsing @timestamp string, setting current time to @timestamp",
175
- :value => ts.inspect, :exception => e.message)
176
- end
177
-
178
- private
179
- def invoke(connection, &block)
180
- @threadpool.post do
181
- begin
182
- # If any errors occur in from the events the connection should be closed in the
183
- # library ensure block and the exception will be handled here
184
- connection.run do |map, identity_stream|
185
- create_event(map, identity_stream, &block)
186
- end
187
-
188
- # When too many errors happen inside the circuit breaker it will throw
189
- # this exception and start refusing connection. The bubbling of theses
190
- # exceptions make sure that the lumberjack library will close the current
191
- # connection which will force the client to reconnect and restransmit
192
- # his payload.
193
- rescue LogStash::CircuitBreaker::OpenBreaker,
194
- LogStash::CircuitBreaker::HalfOpenBreaker => e
195
- logger.warn("Beats input: The circuit breaker has detected a slowdown or stall in the pipeline, the input is closing the current connection and rejecting new connection until the pipeline recover.", :exception => e.class)
196
- rescue => e # If we have a malformed packet we should handle that so the input doesn't crash completely.
197
- @logger.error("Beats input: unhandled exception", :exception => e, :backtrace => e.backtrace)
198
- end
166
+ # This Method is called inside a new thread
167
+ def handle_new_connection(connection)
168
+ logger.debug? && logger.debug("Beats inputs: accepting a new connection",
169
+ :peer => connection.peer)
170
+
171
+ LogStash::Util.set_thread_name("[beats-input]>connection-#{connection.peer}")
172
+
173
+ connection_handler = LogStash::Inputs::BeatsSupport::ConnectionHandler.new(connection, self, @buffered_queue)
174
+ @connections_list[connection] = connection_handler
175
+
176
+ # All the errors handling is done here
177
+ @circuit_breaker.execute { connection_handler.accept }
178
+ rescue Lumberjack::Beats::Connection::ConnectionClosed => e
179
+ logger.warn("Beats Input: Remote connection closed",
180
+ :peer => connection.peer,
181
+ :exception => e)
182
+ rescue LogStash::Inputs::BeatsSupport::CircuitBreaker::OpenBreaker,
183
+ LogStash::Inputs::BeatsSupport::CircuitBreaker::HalfOpenBreaker => e
184
+ logger.warn("Beats input: The circuit breaker has detected a slowdown or stall in the pipeline, the input is closing the current connection and rejecting new connection until the pipeline recover.",
185
+ :exception => e.class)
186
+ rescue Exception => e # If we have a malformed packet we should handle that so the input doesn't crash completely.
187
+ @logger.error("Beats input: unhandled exception",
188
+ :exception => e,
189
+ :backtrace => e.backtrace)
190
+ ensure
191
+ transformer = LogStash::Inputs::BeatsSupport::EventTransformCommon.new(self)
192
+
193
+ connection_handler.flush do |event|
194
+ # handle the basic event enrichment with tags
195
+ # since at that time we lose all the context
196
+ transformer.transform(event)
197
+ event.tag("beats_input_flushed_by_end_of_connection")
198
+ @output_queue << event
199
199
  end
200
+
201
+ @connections_list.delete(connection)
202
+ @logger.debug? && @logger.debug("Beats input: clearing the connection from the known clients",
203
+ :peer => connection.peer)
200
204
  end
201
205
 
202
206
  # The default Logstash Sizequeue doesn't support timeouts.
@@ -206,9 +210,19 @@ class LogStash::Inputs::Beats < LogStash::Inputs::Base
206
210
  # We are using a proxy queue supporting blocking with a timeout and
207
211
  # this thread take the element from one queue into another one.
208
212
  def start_buffer_broker
209
- @threadpool.post do
210
- while !stop?
211
- @output_queue << @buffered_queue.pop_no_timeout
213
+ Thread.new do
214
+ LogStash::Util.set_thread_name("[beats-input]-buffered-queue-broker")
215
+
216
+ begin
217
+ while !stop?
218
+ @output_queue << @buffered_queue.take
219
+ end
220
+ rescue InterruptionException => e
221
+ # If we are shutting down without waiting the queue to unblock
222
+ # we will get an `InterruptionException` in that context we will not log it.
223
+ @logger.error("Beats input: bufferered queue exception", :exception => e) unless stop?
224
+ rescue => e
225
+ @logger.error("Beats input: unexpected exception", :exception => e)
212
226
  end
213
227
  end
214
228
  end
@@ -1,7 +1,7 @@
1
1
  require "thread"
2
2
  require "cabin"
3
3
 
4
- module LogStash
4
+ module LogStash::Inputs::BeatsSupport
5
5
  # Largely inspired by Martin's fowler circuit breaker
6
6
  class CircuitBreaker
7
7
  # Raised when too many errors has occured and we refuse to execute the block
@@ -57,7 +57,9 @@ module LogStash
57
57
  end
58
58
 
59
59
  def closed?
60
- state == :close || state == :half_open
60
+ current_state = state
61
+
62
+ current_state == :close || current_state == :half_open
61
63
  end
62
64
 
63
65
  private
@@ -73,22 +75,26 @@ module LogStash
73
75
  end
74
76
 
75
77
  def increment_errors(exception)
78
+ t = Time.now
79
+
76
80
  @mutex.synchronize do
77
81
  @errors_count += 1
78
- @last_failure_time = Time.now
79
-
80
- logger.debug("CircuitBreaker increment errors",
81
- :errors_count => @errors_count,
82
- :error_threshold => @error_threshold,
83
- :exception => exception.class,
84
- :message => exception.message) if logger.debug?
82
+ @last_failure_time = t
85
83
  end
84
+
85
+ logger.debug("CircuitBreaker increment errors",
86
+ :errors_count => @errors_count,
87
+ :error_threshold => @error_threshold,
88
+ :exception => exception.class,
89
+ :message => exception.message) if logger.debug?
86
90
  end
87
91
 
88
92
  def state
93
+ t = Time.now
94
+
89
95
  @mutex.synchronize do
90
96
  if @errors_count >= @error_threshold
91
- if Time.now - @last_failure_time > @time_before_retry
97
+ if t - @last_failure_time > @time_before_retry
92
98
  :half_open
93
99
  else
94
100
  :open
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats"
3
+
4
+ module LogStash::Inputs::BeatsSupport
5
+ # Use the new callback based approch instead of using blocks
6
+ # so we can retain some context of the execution, and make it easier to test
7
+ class CodecCallbackListener
8
+ attr_accessor :data
9
+ # The path acts as the `stream_identity`,
10
+ # usefull when the clients is reading multiples files
11
+ attr_accessor :path
12
+
13
+ def initialize(data, hash, path, transformer, queue)
14
+ @data = data
15
+ @hash = hash
16
+ @path = path
17
+ @queue = queue
18
+ @transformer = transformer
19
+ end
20
+
21
+ def process_event(event)
22
+ @transformer.transform(event, @hash)
23
+ raise LogStash::Inputs::Beats::InsertingToQueueTakeTooLong if !@queue.offer(event)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,79 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats"
3
+ require "logstash/inputs/beats_support/decoded_event_transform"
4
+ require "logstash/inputs/beats_support/raw_event_transform"
5
+
6
+ module LogStash::Inputs::BeatsSupport
7
+ # Handle the data coming from a connection
8
+ # Decide which Process should be used to decode the data coming
9
+ # from the beat library.
10
+ #
11
+ # - Should we use a codec on specific field?
12
+ # - Should we just take the raw content of the parsed json frame
13
+ class ConnectionHandler
14
+ def initialize(connection, input, queue)
15
+ @connection = connection
16
+
17
+ @input = input
18
+ @queue = queue
19
+ @logger = input.logger
20
+
21
+ # We need to clone the codec per connection, so we can flush a specific
22
+ # codec when a connection is closed.
23
+ @codec = input.codec.dup
24
+
25
+ @nocodec_transformer = RawEventTransform.new(@input)
26
+ @codec_transformer = DecodedEventTransform.new(@input)
27
+ end
28
+
29
+ def accept
30
+ @logger.debug("Beats input: waiting from new events from remote host",
31
+ :peer => @connection.peer)
32
+
33
+ @connection.run { |hash, identity_stream| process(hash, identity_stream) }
34
+ end
35
+
36
+ def process(hash, identity_stream)
37
+ @logger.debug? && @logger.debug("Beats input: new event received",
38
+ :event_hash => hash,
39
+ :identity_stream => identity_stream,
40
+ :peer => @connection.peer)
41
+
42
+ # Filebeats uses the `message` key and LSF `line`
43
+ target_field = @input.target_field_for_codec ? hash.delete(@input.target_field_for_codec) : nil
44
+
45
+ if target_field.nil?
46
+ @logger.debug? && @logger.debug("Beats input: not using the codec for this event, can't find the codec target field",
47
+ :target_field_for_codec => @input.target_field_for_codec,
48
+ :event_hash => hash)
49
+
50
+ event = LogStash::Event.new(hash)
51
+ @nocodec_transformer.transform(event)
52
+
53
+ raise LogStash::Inputs::Beats::InsertingToQueueTakeTooLong if !@queue.offer(event)
54
+ else
55
+ @logger.debug? && @logger.debug("Beats input: decoding this event with the codec",
56
+ :target_field_value => target_field)
57
+
58
+ @codec.accept(CodecCallbackListener.new(target_field,
59
+ hash,
60
+ identity_stream,
61
+ @codec_transformer,
62
+ @queue))
63
+ end
64
+ end
65
+
66
+ # OOB call to flush the codec buffer,
67
+ #
68
+ # This method is a bit tricky to decide when to be called, in the current case,
69
+ # this will be call on any exception raised, either is a circuit breaker or the
70
+ # remote host closed the connection, its better to make sure we clear their
71
+ # data and create duplicates then losing the data.
72
+ def flush(&block)
73
+ @logger.debug? && @logger.debug("Beats input, out of band call for flushing the content of this connection",
74
+ :peer => @connection.peer)
75
+
76
+ @codec.flush(&block)
77
+ end
78
+ end
79
+ end