logstash-input-beats 2.0.3 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 86b68287fce7667ece60ab81196daf597a34bf0f
4
- data.tar.gz: eba32adca86f4b4fa7c37c7755ab524498e87b8f
3
+ metadata.gz: dcc82edf516cc926764c9bf592b6b4240d2b250a
4
+ data.tar.gz: afd96ca6287561a27ef83d1e5070785cc507da00
5
5
  SHA512:
6
- metadata.gz: 2199de568cb60a38bf73f0fdf61a24ff75a5596e2aaf984f1c468f6c1d8ac505e4d7717e7bf77ff4af2579678b57c1fc48125f3a72a02e86d302d0184ab3d514
7
- data.tar.gz: 86bae83fb7c1767eec85021c47c7de6b003ed725fa5dfb6e8b2f64e2fa3fe1597e70d04e356145b836c8d991545c7733949bd420d41a300b39898d7a587cbe4d
6
+ metadata.gz: 9a4d8d3e817adaad0e49659c713ba864e2d32837ca39a751cb78d026bc71b215564048e7db7912934a551ff60c0aa2dc3ae52ae06b5f9ee4d4bd25814e4277f2
7
+ data.tar.gz: 0416a16c50b648351929119370f8ea513ff2dfd1657afd47b7332e6ef3758c3381505ce639ba122790024a638272dd91e766f43fd0cd277143b32e66382f7578
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ # 2.1.1
2
+ - Release a new version of the gem that doesn't included any other gems, 2.1.0 is yanked from rubygems
3
+ # 2.1.0
4
+ - Refactor of the code to make it easier to unit test
5
+ - Fix a conncurrency error on high load on the SizeQueue #37
6
+ - Drop the internal SizeQueue to rely on Java Synchronous Queue
7
+ - Remove the majority of the nested blocks
8
+ - Move the CircuitBreaker inside an internal namespace so it doesn't conflict with the input lumberjack
9
+ - Add more debugging log statement
10
+ - Flush the codec when a disconnect happen
11
+ - Tag/Decorate the event when a shutdown occur.
12
+ - The name of the threads managed by the input beat are now meaningful.
1
13
  # 2.0.3
2
14
  - Reduce the size of the gem by removing vendor jars
3
15
  # 2.0.2
@@ -5,6 +5,15 @@ require "logstash/timestamp"
5
5
  require "lumberjack/beats"
6
6
  require "lumberjack/beats/server"
7
7
  require "logstash/codecs/identity_map_codec"
8
+ require "logstash/inputs/beats_support/circuit_breaker"
9
+ require "logstash/inputs/beats_support/codec_callback_listener"
10
+ require "logstash/inputs/beats_support/connection_handler"
11
+ require "logstash/inputs/beats_support/event_transform_common"
12
+ require "logstash/inputs/beats_support/decoded_event_transform"
13
+ require "logstash/inputs/beats_support/raw_event_transform"
14
+ require "logstash/inputs/beats_support/synchronous_queue_with_offer"
15
+ require "logstash/util"
16
+ require "thread_safe"
8
17
 
9
18
  # use Logstash provided json decoder
10
19
  Lumberjack::Beats::json = LogStash::Json
@@ -13,7 +22,28 @@ Lumberjack::Beats::json = LogStash::Json
13
22
  #
14
23
  # https://github.com/elastic/filebeat[filebeat]
15
24
  #
25
+
26
+ class LogStash::Codecs::Base
27
+ # This monkey patch add callback based
28
+ # flow to the codec until its shipped with core.
29
+ # This give greater flexibility to the implementation by
30
+ # sending more data to the actual block.
31
+ if !method_defined?(:accept)
32
+ def accept(listener)
33
+ decode(listener.data) do |event|
34
+ listener.process_event(event)
35
+ end
36
+ end
37
+ end
38
+ if !method_defined?(:auto_flush)
39
+ def auto_flush
40
+ end
41
+ end
42
+ end
43
+
16
44
  class LogStash::Inputs::Beats < LogStash::Inputs::Base
45
+ class InsertingToQueueTakeTooLong < Exception; end
46
+
17
47
  config_name "beats"
18
48
 
19
49
  default :codec, "plain"
@@ -46,41 +76,37 @@ class LogStash::Inputs::Beats < LogStash::Inputs::Base
46
76
  config :target_field_for_codec, :validate => :string, :default => "message"
47
77
 
48
78
  # TODO(sissel): Add CA to authenticate clients with.
49
- BUFFERED_QUEUE_SIZE = 1
50
79
  RECONNECT_BACKOFF_SLEEP = 0.5
51
80
 
52
81
  def register
53
- require "concurrent"
54
- require "logstash/circuit_breaker"
55
- require "logstash/sized_queue_timeout"
56
-
57
82
  if !@ssl
58
- @logger.warn("Beats: SSL Certificate will not be used") unless @ssl_certificate.nil?
59
- @logger.warn("Beats: SSL Key will not be used") unless @ssl_key.nil?
83
+ @logger.warn("Beats input: SSL Certificate will not be used") unless @ssl_certificate.nil?
84
+ @logger.warn("Beats input: SSL Key will not be used") unless @ssl_key.nil?
60
85
  elsif !ssl_configured?
61
86
  raise LogStash::ConfigurationError, "Certificate or Certificate Key not configured"
62
87
  end
63
88
 
64
- @logger.info("Starting Beats input listener", :address => "#{@host}:#{@port}")
89
+ @logger.info("Beats inputs: Starting input listener", :address => "#{@host}:#{@port}")
65
90
  @lumberjack = Lumberjack::Beats::Server.new(:address => @host, :port => @port,
66
91
  :ssl => @ssl, :ssl_certificate => @ssl_certificate, :ssl_key => @ssl_key,
67
92
  :ssl_key_passphrase => @ssl_key_passphrase)
68
93
 
69
- # Create a reusable threadpool, we do not limit the number of connections
70
- # to the input, the circuit breaker with the timeout should take care
71
- # of `blocked` threads and prevent logstash to go oom.
72
- @threadpool = Concurrent::CachedThreadPool.new(:idletime => 15)
73
-
74
94
  # in 1.5 the main SizeQueue doesnt have the concept of timeout
75
95
  # We are using a small plugin buffer to move events to the internal queue
76
- @buffered_queue = LogStash::SizedQueueTimeout.new(BUFFERED_QUEUE_SIZE)
96
+ @buffered_queue = LogStash::Inputs::BeatsSupport::SynchronousQueueWithOffer.new(@congestion_threshold)
77
97
 
78
- @circuit_breaker = LogStash::CircuitBreaker.new("Beats input",
79
- :exceptions => [LogStash::SizedQueueTimeout::TimeoutError])
98
+ @circuit_breaker = LogStash::Inputs::BeatsSupport::CircuitBreaker.new("Beats input",
99
+ :exceptions => [InsertingToQueueTakeTooLong])
80
100
 
81
101
  # wrap the configured codec to support identity stream
82
102
  # from the producers
83
103
  @codec = LogStash::Codecs::IdentityMapCodec.new(@codec)
104
+
105
+ # Keep a list of active connections so we can flush their codec on shutdown
106
+
107
+ # Use threadsafe gem, since we have a strict dependency on concurrent-ruby 0.9.2
108
+ # in the core
109
+ @connections_list = ThreadSafe::Hash.new
84
110
  end # def register
85
111
 
86
112
  def ssl_configured?
@@ -99,104 +125,82 @@ class LogStash::Inputs::Beats < LogStash::Inputs::Base
99
125
  # Wrapping the accept call into a CircuitBreaker
100
126
  if @circuit_breaker.closed?
101
127
  connection = @lumberjack.accept # call that creates a new connection
102
- next if connection.nil? # if the connection is nil the connection was close.
103
-
104
- invoke(connection) do |event|
105
- if stop?
106
- connection.close
107
- break
108
- end
109
-
110
- begin
111
- @circuit_breaker.execute {
112
- @buffered_queue.push(event, @congestion_threshold)
113
- }
114
- rescue => e
115
- raise e
116
- end
128
+ # if the connection is nil the connection was closed upstream,
129
+ # so we will try in another iteration to recover or stop.
130
+ next if connection.nil?
131
+
132
+ Thread.new do
133
+ handle_new_connection(connection)
117
134
  end
118
135
  else
119
- @logger.warn("Beats input: the pipeline is blocked, temporary refusing new connection.")
136
+ @logger.warn("Beats input: the pipeline is blocked, temporary refusing new connection.",
137
+ :reconnect_backoff_sleep => RECONNECT_BACKOFF_SLEEP)
120
138
  sleep(RECONNECT_BACKOFF_SLEEP)
121
139
  end
122
140
  end
123
141
  end # def run
124
142
 
125
- public
126
143
  def stop
127
- # we may have some stuff in the buffer
128
- @codec.flush { |event| @output_queue << event }
144
+ @logger.debug("Beats input: stopping the plugin")
145
+
129
146
  @lumberjack.close rescue nil
130
- end
131
147
 
132
- public
133
- def create_event(map, identity_stream, &block)
134
- # Filebeats uses the `message` key and LSF `line`
135
- target_field = target_field_for_codec ? map.delete(target_field_for_codec) : nil
136
-
137
- if target_field.nil?
138
- event = LogStash::Event.new(map)
139
- copy_beat_hostname(event)
140
- decorate(event)
141
- block.call(event)
142
- else
143
- # All codecs expects to work on string
144
- @codec.decode(target_field.to_s, identity_stream) do |decoded|
145
- ts = coerce_ts(map.delete("@timestamp"))
146
- decoded["@timestamp"] = ts unless ts.nil?
147
- map.each { |k, v| decoded[k] = v }
148
- copy_beat_hostname(decoded)
149
- decorate(decoded)
150
- block.call(decoded)
148
+ # we may have some stuff in the codec buffer
149
+ transformer = LogStash::Inputs::BeatsSupport::EventTransformCommon.new(self)
150
+
151
+ # Go through all the active connection and flush their
152
+ # codec content, some context data could be lost in this case
153
+ # but at least the events main data would be persisted.
154
+ @connections_list.each do |_, connection_handler|
155
+ connection_handler.flush do |event|
156
+ # We might loose some context of the
157
+ transformer.transform(event)
158
+ event.tag("beats_input_flushed_by_logtash_shutdown")
159
+ @output_queue << event
151
160
  end
152
161
  end
153
- end
154
162
 
155
- # Copies the beat.hostname field into the host field unless
156
- # the host field is already defined
157
- private
158
- def copy_beat_hostname(event)
159
- host = event["beat"] ? event["beat"]["hostname"] : nil
160
- if host && event["host"].nil?
161
- event["host"] = host
162
- end
163
+ @logger.debug("Beats input: stopped")
163
164
  end
164
165
 
165
- private
166
- def coerce_ts(ts)
167
- return nil if ts.nil?
168
- timestamp = LogStash::Timestamp.coerce(ts)
169
- return timestamp if timestamp
170
-
171
- @logger.warn("Unrecognized @timestamp value, setting current time to @timestamp",
172
- :value => ts.inspect)
173
- rescue LogStash::TimestampParserError => e
174
- @logger.warn("Error parsing @timestamp string, setting current time to @timestamp",
175
- :value => ts.inspect, :exception => e.message)
176
- end
177
-
178
- private
179
- def invoke(connection, &block)
180
- @threadpool.post do
181
- begin
182
- # If any errors occur in from the events the connection should be closed in the
183
- # library ensure block and the exception will be handled here
184
- connection.run do |map, identity_stream|
185
- create_event(map, identity_stream, &block)
186
- end
187
-
188
- # When too many errors happen inside the circuit breaker it will throw
189
- # this exception and start refusing connection. The bubbling of theses
190
- # exceptions make sure that the lumberjack library will close the current
191
- # connection which will force the client to reconnect and restransmit
192
- # his payload.
193
- rescue LogStash::CircuitBreaker::OpenBreaker,
194
- LogStash::CircuitBreaker::HalfOpenBreaker => e
195
- logger.warn("Beats input: The circuit breaker has detected a slowdown or stall in the pipeline, the input is closing the current connection and rejecting new connection until the pipeline recover.", :exception => e.class)
196
- rescue => e # If we have a malformed packet we should handle that so the input doesn't crash completely.
197
- @logger.error("Beats input: unhandled exception", :exception => e, :backtrace => e.backtrace)
198
- end
166
+ # This Method is called inside a new thread
167
+ def handle_new_connection(connection)
168
+ logger.debug? && logger.debug("Beats inputs: accepting a new connection",
169
+ :peer => connection.peer)
170
+
171
+ LogStash::Util.set_thread_name("[beats-input]>connection-#{connection.peer}")
172
+
173
+ connection_handler = LogStash::Inputs::BeatsSupport::ConnectionHandler.new(connection, self, @buffered_queue)
174
+ @connections_list[connection] = connection_handler
175
+
176
+ # All the errors handling is done here
177
+ @circuit_breaker.execute { connection_handler.accept }
178
+ rescue Lumberjack::Beats::Connection::ConnectionClosed => e
179
+ logger.warn("Beats Input: Remote connection closed",
180
+ :peer => connection.peer,
181
+ :exception => e)
182
+ rescue LogStash::Inputs::BeatsSupport::CircuitBreaker::OpenBreaker,
183
+ LogStash::Inputs::BeatsSupport::CircuitBreaker::HalfOpenBreaker => e
184
+ logger.warn("Beats input: The circuit breaker has detected a slowdown or stall in the pipeline, the input is closing the current connection and rejecting new connection until the pipeline recover.",
185
+ :exception => e.class)
186
+ rescue Exception => e # If we have a malformed packet we should handle that so the input doesn't crash completely.
187
+ @logger.error("Beats input: unhandled exception",
188
+ :exception => e,
189
+ :backtrace => e.backtrace)
190
+ ensure
191
+ transformer = LogStash::Inputs::BeatsSupport::EventTransformCommon.new(self)
192
+
193
+ connection_handler.flush do |event|
194
+ # handle the basic event enrichment with tags
195
+ # since at that time we lose all the context
196
+ transformer.transform(event)
197
+ event.tag("beats_input_flushed_by_end_of_connection")
198
+ @output_queue << event
199
199
  end
200
+
201
+ @connections_list.delete(connection)
202
+ @logger.debug? && @logger.debug("Beats input: clearing the connection from the known clients",
203
+ :peer => connection.peer)
200
204
  end
201
205
 
202
206
  # The default Logstash Sizequeue doesn't support timeouts.
@@ -206,9 +210,19 @@ class LogStash::Inputs::Beats < LogStash::Inputs::Base
206
210
  # We are using a proxy queue supporting blocking with a timeout and
207
211
  # this thread take the element from one queue into another one.
208
212
  def start_buffer_broker
209
- @threadpool.post do
210
- while !stop?
211
- @output_queue << @buffered_queue.pop_no_timeout
213
+ Thread.new do
214
+ LogStash::Util.set_thread_name("[beats-input]-buffered-queue-broker")
215
+
216
+ begin
217
+ while !stop?
218
+ @output_queue << @buffered_queue.take
219
+ end
220
+ rescue InterruptionException => e
221
+ # If we are shutting down without waiting the queue to unblock
222
+ # we will get an `InterruptionException` in that context we will not log it.
223
+ @logger.error("Beats input: bufferered queue exception", :exception => e) unless stop?
224
+ rescue => e
225
+ @logger.error("Beats input: unexpected exception", :exception => e)
212
226
  end
213
227
  end
214
228
  end
@@ -1,7 +1,7 @@
1
1
  require "thread"
2
2
  require "cabin"
3
3
 
4
- module LogStash
4
+ module LogStash::Inputs::BeatsSupport
5
5
  # Largely inspired by Martin's fowler circuit breaker
6
6
  class CircuitBreaker
7
7
  # Raised when too many errors has occured and we refuse to execute the block
@@ -57,7 +57,9 @@ module LogStash
57
57
  end
58
58
 
59
59
  def closed?
60
- state == :close || state == :half_open
60
+ current_state = state
61
+
62
+ current_state == :close || current_state == :half_open
61
63
  end
62
64
 
63
65
  private
@@ -73,22 +75,26 @@ module LogStash
73
75
  end
74
76
 
75
77
  def increment_errors(exception)
78
+ t = Time.now
79
+
76
80
  @mutex.synchronize do
77
81
  @errors_count += 1
78
- @last_failure_time = Time.now
79
-
80
- logger.debug("CircuitBreaker increment errors",
81
- :errors_count => @errors_count,
82
- :error_threshold => @error_threshold,
83
- :exception => exception.class,
84
- :message => exception.message) if logger.debug?
82
+ @last_failure_time = t
85
83
  end
84
+
85
+ logger.debug("CircuitBreaker increment errors",
86
+ :errors_count => @errors_count,
87
+ :error_threshold => @error_threshold,
88
+ :exception => exception.class,
89
+ :message => exception.message) if logger.debug?
86
90
  end
87
91
 
88
92
  def state
93
+ t = Time.now
94
+
89
95
  @mutex.synchronize do
90
96
  if @errors_count >= @error_threshold
91
- if Time.now - @last_failure_time > @time_before_retry
97
+ if t - @last_failure_time > @time_before_retry
92
98
  :half_open
93
99
  else
94
100
  :open
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats"
3
+
4
+ module LogStash::Inputs::BeatsSupport
5
+ # Use the new callback based approch instead of using blocks
6
+ # so we can retain some context of the execution, and make it easier to test
7
+ class CodecCallbackListener
8
+ attr_accessor :data
9
+ # The path acts as the `stream_identity`,
10
+ # usefull when the clients is reading multiples files
11
+ attr_accessor :path
12
+
13
+ def initialize(data, hash, path, transformer, queue)
14
+ @data = data
15
+ @hash = hash
16
+ @path = path
17
+ @queue = queue
18
+ @transformer = transformer
19
+ end
20
+
21
+ def process_event(event)
22
+ @transformer.transform(event, @hash)
23
+ raise LogStash::Inputs::Beats::InsertingToQueueTakeTooLong if !@queue.offer(event)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,79 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats"
3
+ require "logstash/inputs/beats_support/decoded_event_transform"
4
+ require "logstash/inputs/beats_support/raw_event_transform"
5
+
6
+ module LogStash::Inputs::BeatsSupport
7
+ # Handle the data coming from a connection
8
+ # Decide which Process should be used to decode the data coming
9
+ # from the beat library.
10
+ #
11
+ # - Should we use a codec on specific field?
12
+ # - Should we just take the raw content of the parsed json frame
13
+ class ConnectionHandler
14
+ def initialize(connection, input, queue)
15
+ @connection = connection
16
+
17
+ @input = input
18
+ @queue = queue
19
+ @logger = input.logger
20
+
21
+ # We need to clone the codec per connection, so we can flush a specific
22
+ # codec when a connection is closed.
23
+ @codec = input.codec.dup
24
+
25
+ @nocodec_transformer = RawEventTransform.new(@input)
26
+ @codec_transformer = DecodedEventTransform.new(@input)
27
+ end
28
+
29
+ def accept
30
+ @logger.debug("Beats input: waiting from new events from remote host",
31
+ :peer => @connection.peer)
32
+
33
+ @connection.run { |hash, identity_stream| process(hash, identity_stream) }
34
+ end
35
+
36
+ def process(hash, identity_stream)
37
+ @logger.debug? && @logger.debug("Beats input: new event received",
38
+ :event_hash => hash,
39
+ :identity_stream => identity_stream,
40
+ :peer => @connection.peer)
41
+
42
+ # Filebeats uses the `message` key and LSF `line`
43
+ target_field = @input.target_field_for_codec ? hash.delete(@input.target_field_for_codec) : nil
44
+
45
+ if target_field.nil?
46
+ @logger.debug? && @logger.debug("Beats input: not using the codec for this event, can't find the codec target field",
47
+ :target_field_for_codec => @input.target_field_for_codec,
48
+ :event_hash => hash)
49
+
50
+ event = LogStash::Event.new(hash)
51
+ @nocodec_transformer.transform(event)
52
+
53
+ raise LogStash::Inputs::Beats::InsertingToQueueTakeTooLong if !@queue.offer(event)
54
+ else
55
+ @logger.debug? && @logger.debug("Beats input: decoding this event with the codec",
56
+ :target_field_value => target_field)
57
+
58
+ @codec.accept(CodecCallbackListener.new(target_field,
59
+ hash,
60
+ identity_stream,
61
+ @codec_transformer,
62
+ @queue))
63
+ end
64
+ end
65
+
66
+ # OOB call to flush the codec buffer,
67
+ #
68
+ # This method is a bit tricky to decide when to be called, in the current case,
69
+ # this will be call on any exception raised, either is a circuit breaker or the
70
+ # remote host closed the connection, its better to make sure we clear their
71
+ # data and create duplicates then losing the data.
72
+ def flush(&block)
73
+ @logger.debug? && @logger.debug("Beats input, out of band call for flushing the content of this connection",
74
+ :peer => @connection.peer)
75
+
76
+ @codec.flush(&block)
77
+ end
78
+ end
79
+ end