logstash-input-beats 2.0.3 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats_support/event_transform_common"
3
+ module LogStash::Inputs::BeatsSupport
4
+ # Take the extracted content from the codec, merged with the other data coming
5
+ # from beats, apply the configured tags, normalize the host and try to coerce
6
+ # the timestamp if it was provided in the hash.
7
+ class DecodedEventTransform < EventTransformCommon
8
+ def transform(event, hash)
9
+ ts = coerce_ts(hash.delete("@timestamp"))
10
+
11
+ event["@timestamp"] = ts unless ts.nil?
12
+ hash.each { |k, v| event[k] = v }
13
+ super(event)
14
+ event.tag("beats_input_codec_#{@input.codec.base_codec.class.config_name}_applied")
15
+ event
16
+ end
17
+
18
+ private
19
+ def coerce_ts(ts)
20
+ return nil if ts.nil?
21
+ timestamp = LogStash::Timestamp.coerce(ts)
22
+
23
+ return timestamp if timestamp
24
+
25
+ @logger.warn("Unrecognized @timestamp value, setting current time to @timestamp",
26
+ :value => ts.inspect)
27
+ return nil
28
+ rescue LogStash::TimestampParserError => e
29
+ @logger.warn("Error parsing @timestamp string, setting current time to @timestamp",
30
+ :value => ts.inspect, :exception => e.message)
31
+ return nil
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+ module LogStash::Inputs::BeatsSupport
3
+ # Base Transform class, expose the plugin decorate method,
4
+ # apply the tags and make sure we copy the beat hostname into `host`
5
+ # for backward compatibility.
6
+ class EventTransformCommon
7
+ def initialize(input)
8
+ @input = input
9
+ @logger = input.logger
10
+ end
11
+
12
+ # Copies the beat.hostname field into the host field unless
13
+ # the host field is already defined
14
+ def copy_beat_hostname(event)
15
+ host = event["[beat][hostname]"]
16
+
17
+ if host && event["host"].nil?
18
+ event["host"] = host
19
+ end
20
+ end
21
+
22
+ # This break the `#decorate` method visibility of the plugin base
23
+ # class, the method is protected and we cannot access it, but well ruby
24
+ # can let you do all the wrong thing.
25
+ #
26
+ # I think the correct behavior would be to allow the plugin to return a
27
+ # `Decorator` object that we can pass to other objects, since only the
28
+ # plugin know the data used to decorate. This would allow a more component
29
+ # based workflow.
30
+ def decorate(event)
31
+ @input.send(:decorate, event)
32
+ end
33
+
34
+ def transform(event)
35
+ copy_beat_hostname(event)
36
+ decorate(event)
37
+ event
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats_support/event_transform_common"
3
+ module LogStash::Inputs::BeatsSupport
4
+ # Take the the raw output from the library, decorate it with
5
+ # the configured tags in the plugins and normalize the hostname
6
+ # for backward compatibility
7
+ #
8
+ #
9
+ # @see [Lumberjack::Beats::Parser]
10
+ #
11
+ class RawEventTransform < EventTransformCommon
12
+ def transform(event)
13
+ super(event)
14
+ event.tag("beats_input_raw_event")
15
+ event
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+ module LogStash::Inputs::BeatsSupport
3
+ # Wrap the `Java SynchronousQueue` to acts as the synchronization mechanism
4
+ # this queue can block for a maximum amount of time logstash's queue
5
+ # doesn't implement that feature.
6
+ #
7
+ # See proposal for core: https://github.com/elastic/logstash/pull/4408
8
+ #
9
+ # See https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/SynchronousQueue.html
10
+ java_import "java.util.concurrent.SynchronousQueue"
11
+ java_import "java.util.concurrent.TimeUnit"
12
+ class SynchronousQueueWithOffer
13
+ def initialize(timeout, fairness_policy = true)
14
+ # set Fairness policy to `FIFO`
15
+ #
16
+ # In the context of the input it makes sense to
17
+ # try to deal with the older connection before
18
+ # the newer one, since the older will be closer to
19
+ # reach the connection timeout.
20
+ #
21
+ @timeout = timeout
22
+ @queue = java.util.concurrent.SynchronousQueue.new(fairness_policy)
23
+ end
24
+
25
+ # This method will return true if it successfully added the element to the queue.
26
+ # If the timeout is reached and it wasn't inserted successfully to
27
+ # the queue it will return false.
28
+ def offer(element, timeout = nil)
29
+ @queue.offer(element, timeout || @timeout, java.util.concurrent.TimeUnit::SECONDS)
30
+ end
31
+
32
+ def take
33
+ @queue.take
34
+ end
35
+ end
36
+ end
@@ -57,6 +57,13 @@ module Lumberjack module Beats
57
57
  end
58
58
  end # def initialize
59
59
 
60
+ # Server#run method, allow the library to manage all the connection
61
+ # threads, this handing is quite minimal and don't handler
62
+ # all the possible cases deconnection/connection.
63
+ #
64
+ # To have a more granular control over the connection you should manage
65
+ # them yourself, see Server#accept method which return a Connection
66
+ # instance.
60
67
  def run(&block)
61
68
  while !closed?
62
69
  connection = accept
@@ -67,7 +74,14 @@ module Lumberjack module Beats
67
74
  next unless connection
68
75
 
69
76
  Thread.new(connection) do |connection|
70
- connection.run(&block)
77
+ begin
78
+ connection.run(&block)
79
+ rescue Lumberjack::Beats::Connection::ConnectionClosed
80
+ # Connection will raise a wrapped exception upstream,
81
+ # but if the threads are managed by the library we can simply ignore it.
82
+ #
83
+ # Note: This follow the previous behavior of the perfect silence.
84
+ end
71
85
  end
72
86
  end
73
87
  end # def run
@@ -132,6 +146,7 @@ module Lumberjack module Beats
132
146
  PROTOCOL_VERSION_2 = "2".ord
133
147
 
134
148
  SUPPORTED_PROTOCOLS = [PROTOCOL_VERSION_1, PROTOCOL_VERSION_2]
149
+ class UnsupportedProtocol < StandardError; end
135
150
 
136
151
  def initialize
137
152
  @buffer_offset = 0
@@ -222,7 +237,7 @@ module Lumberjack module Beats
222
237
  if supported_protocol?(version)
223
238
  yield :version, version
224
239
  else
225
- raise "unsupported protocol #{version}"
240
+ raise UnsupportedProtocol, "unsupported protocol #{version}"
226
241
  end
227
242
  end
228
243
 
@@ -298,9 +313,37 @@ module Lumberjack module Beats
298
313
  end # class Parser
299
314
 
300
315
  class Connection
316
+ # Wrap the original exception into a common one,
317
+ # to make upstream managing and reporting easier
318
+ # But lets make sure we keep the meaning of the original exception.
319
+ class ConnectionClosed < StandardError
320
+ attr_reader :original_exception
321
+
322
+ def initialize(original_exception)
323
+ super(original_exception)
324
+
325
+ @original_exception = original_exception
326
+ set_backtrace(original_exception.backtrace) if original_exception
327
+ end
328
+
329
+ def to_s
330
+ "#{self.class.name} wrapping: #{original_exception.class.name}, #{super.to_s}"
331
+ end
332
+ end
333
+
301
334
  READ_SIZE = 16384
335
+ PEER_INFORMATION_NOT_AVAILABLE = "<PEER INFORMATION NOT AVAILABLE>"
336
+ RESCUED_CONNECTION_EXCEPTIONS = [
337
+ EOFError,
338
+ OpenSSL::SSL::SSLError,
339
+ IOError,
340
+ Errno::ECONNRESET,
341
+ Errno::EPIPE,
342
+ Lumberjack::Beats::Parser::UnsupportedProtocol
343
+ ]
302
344
 
303
345
  attr_accessor :server
346
+ attr_reader :peer
304
347
 
305
348
  def initialize(fd, server)
306
349
  @parser = Parser.new
@@ -308,23 +351,27 @@ module Lumberjack module Beats
308
351
 
309
352
  @server = server
310
353
  @ack_handler = nil
311
- end
312
-
313
- def peer
314
- "#{@fd.peeraddr[3]}:#{@fd.peeraddr[1]}"
354
+
355
+ # Fetch the details of the host before reading anything from the socket
356
+ # se we can use that information when debugging connection issues with
357
+ # remote hosts.
358
+ begin
359
+ @peer = "#{@fd.peeraddr[3]}:#{@fd.peeraddr[1]}"
360
+ rescue IOError
361
+ # This can happen if the connection is drop or close before
362
+ # fetching the host details, lets return a generic string.
363
+ @peer = PEER_INFORMATION_NOT_AVAILABLE
364
+ end
315
365
  end
316
366
 
317
367
  def run(&block)
318
368
  while !server.closed?
319
369
  read_socket(&block)
320
370
  end
321
- rescue EOFError,
322
- OpenSSL::SSL::SSLError,
323
- IOError,
324
- Errno::ECONNRESET,
325
- Errno::EPIPE
371
+ rescue *RESCUED_CONNECTION_EXCEPTIONS => e
326
372
  # EOF or other read errors, only action is to shutdown which we'll do in
327
373
  # 'ensure'
374
+ raise ConnectionClosed.new(e)
328
375
  rescue
329
376
  # when the server is shutting down we can safely ignore any exceptions
330
377
  # On windows, we can get a `SystemCallErr`
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "logstash-input-beats"
3
- s.version = "2.0.3"
3
+ s.version = "2.1.1"
4
4
  s.licenses = ["Apache License (2.0)"]
5
5
  s.summary = "Receive events using the lumberjack protocol."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.require_paths = ["lib"]
11
11
 
12
12
  # Files
13
- s.files = Dir["lib/**/*","spec/**/*","vendor/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT"]
13
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT"]
14
14
 
15
15
  # Tests
16
16
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -23,7 +23,8 @@ Gem::Specification.new do |s|
23
23
 
24
24
  s.add_runtime_dependency "logstash-codec-plain"
25
25
  s.add_runtime_dependency "concurrent-ruby", "~> 0.9.2"
26
- s.add_runtime_dependency "logstash-codec-multiline", "~> 2.0.3"
26
+ s.add_runtime_dependency "thread_safe", "~> 0.3.5"
27
+ s.add_runtime_dependency "logstash-codec-multiline", "~> 2.0.5"
27
28
 
28
29
  s.add_development_dependency "flores", "~>0.0.6"
29
30
  s.add_development_dependency "rspec"
@@ -70,140 +70,49 @@ describe LogStash::Inputs::Beats do
70
70
  end
71
71
  end
72
72
 
73
- describe "#processing of events" do
74
- subject(:beats) { LogStash::Inputs::Beats.new(config) }
75
- let(:codec) { LogStash::Codecs::Multiline.new("pattern" => '\n', "what" => "previous") }
73
+ context "#handle_new_connection" do
74
+ let(:config) {{ "ssl" => false, "port" => 0, "type" => "example", "tags" => "beats" }}
75
+ let(:plugin) { LogStash::Inputs::Beats.new(config) }
76
+ let(:connection) { DummyConnection.new(events) }
77
+ let(:buffer_queue) { DummyNeverBlockedQueue.new }
78
+ let(:pipeline_queue) { [] }
79
+ let(:events) {
80
+ [
81
+ { :map => { "id" => 1 }, :identity_stream => "/var/log/message" },
82
+ { :map => { "id" => 2 }, :identity_stream => "/var/log/message_2" }
83
+ ]
84
+ }
85
+
86
+ before :each do
87
+ plugin.register
88
+
89
+ # Event if we dont mock the actual socket work
90
+ # we have to call run because it will correctly setup the queues
91
+ # instances variables
92
+ t = Thread.new do
93
+ plugin.run(pipeline_queue)
94
+ end
76
95
 
77
- let(:config) do
78
- { "port" => port, "ssl_certificate" => certificate.ssl_cert, "ssl_key" => certificate.ssl_key,
79
- "type" => "example", "codec" => codec }
96
+ sleep(0.1) until t.status == "run"
80
97
  end
81
98
 
82
- before do
83
- beats.register
99
+ after :each do
100
+ plugin.stop
84
101
  end
85
102
 
86
- context "#create_event" do
87
- let(:config) { super.merge({ "add_field" => { "foo" => "bar", "[@metadata][hidden]" => "secret"}, "tags" => ["bonjour"]}) }
88
- let(:event_map) { { "hello" => "world" } }
89
- let(:codec) { LogStash::Codecs::Plain.new }
90
- let(:identity_stream) { "custom-type-input_type-source" }
91
-
92
- context "without a `target_field` defined" do
93
- it "decorates the event" do
94
- beats.create_event(event_map, identity_stream) do |event|
95
- expect(event["foo"]).to eq("bar")
96
- expect(event["[@metadata][hidden]"]).to eq("secret")
97
- expect(event["tags"]).to include("bonjour")
98
- end
99
- end
100
- end
101
-
102
- context "with a `target_field` defined" do
103
- let(:event_map) { super.merge({"message" => "with a field"}) }
104
-
105
- it "decorates the event" do
106
- beats.create_event(event_map, identity_stream) do |event|
107
- expect(event["foo"]).to eq("bar")
108
- expect(event["[@metadata][hidden]"]).to eq("secret")
109
- expect(event["tags"]).to include("bonjour")
110
- end
111
- end
112
- end
113
-
114
- context "when data is buffered in the codec" do
115
- let(:codec) { LogStash::Codecs::Multiline.new("pattern" => '^\s', "what" => "previous") }
116
- let(:event_map) { {"message" => "hello?", "tags" => ["syslog"]} }
117
-
118
- it "returns nil" do
119
- expect { |b| beats.create_event(event_map, identity_stream, &b) }.not_to yield_control
120
- end
121
- end
122
-
123
- context "multiline" do
124
- let(:codec) { LogStash::Codecs::Multiline.new("pattern" => '^2015', "what" => "previous", "negate" => true) }
125
- let(:events_map) do
126
- [
127
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "2015-11-10 10:14:38,907 line 1" },
128
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 1.1" },
129
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "2015-11-10 10:16:38,907 line 2" },
130
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 2.1" },
131
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 2.2" },
132
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 2.3" },
133
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "2015-11-10 10:18:38,907 line 3" }
134
- ]
135
- end
136
-
137
- let(:queue) { [] }
138
- before do
139
- Thread.new { beats.run(queue) }
140
- sleep(0.1)
141
- end
142
-
143
- it "should correctly merge multiple events" do
144
- events_map.each { |map| beats.create_event(map, identity_stream) { |e| queue << e } }
145
- # This cannot currently work without explicitely call a flush
146
- # the flush is never timebased, if no new data is coming in we wont flush the buffer
147
- # https://github.com/logstash-plugins/logstash-codec-multiline/issues/11
148
- beats.stop
149
- expect(queue.size).to eq(3)
150
-
151
- expect(queue.collect { |e| e["message"] }).to include("2015-11-10 10:14:38,907 line 1\nline 1.1",
152
- "2015-11-10 10:16:38,907 line 2\nline 2.1\nline 2.2\nline 2.3",
153
- "2015-11-10 10:18:38,907 line 3")
154
- end
155
- end
156
-
157
- context "with a beat.hostname field" do
158
- let(:event_map) { {"message" => "hello", "beat" => {"hostname" => "linux01"} } }
159
-
160
- it "copies it to the host field" do
161
- beats.create_event(event_map, identity_stream) do |event|
162
- expect(event["host"]).to eq("linux01")
163
- end
164
- end
103
+ context "when an exception occur" do
104
+ let(:connection_handler) { LogStash::Inputs::BeatsSupport::ConnectionHandler.new(connection, plugin, buffer_queue) }
105
+ before do
106
+ expect(LogStash::Inputs::BeatsSupport::ConnectionHandler).to receive(:new).with(any_args).and_return(connection_handler)
165
107
  end
166
108
 
167
- context "with a beat.hostname field but without the message" do
168
- let(:event_map) { {"beat" => {"hostname" => "linux01"} } }
109
+ it "calls flush on the handler and tag the events" do
110
+ expect(connection_handler).to receive(:accept) { raise LogStash::Inputs::Beats::InsertingToQueueTakeTooLong }
111
+ expect(connection_handler).to receive(:flush).and_yield(LogStash::Event.new)
112
+ plugin.handle_new_connection(connection)
169
113
 
170
- it "copies it to the host field" do
171
- beats.create_event(event_map, identity_stream) do |event|
172
- expect(event["host"]).to eq("linux01")
173
- end
174
- end
175
- end
176
-
177
- context "without a beat.hostname field" do
178
- let(:event_map) { {"message" => "hello", "beat" => {"name" => "linux01"} } }
179
-
180
- it "should not add a host field" do
181
- beats.create_event(event_map, identity_stream) do |event|
182
- expect(event["beat"]["name"]).to eq("linux01")
183
- expect(event["host"]).to be_nil
184
- end
185
- end
186
- end
187
-
188
- context "with a beat.hostname and host fields" do
189
- let(:event_map) { {"message" => "hello", "host" => "linux02", "beat" => {"hostname" => "linux01"} } }
190
-
191
- it "should not overwrite host" do
192
- beats.create_event(event_map, identity_stream) do |event|
193
- expect(event["host"]).to eq("linux02")
194
- end
195
- end
196
- end
197
-
198
- context "with a host field in the message" do
199
- let(:codec) { LogStash::Codecs::JSON.new }
200
- let(:event_map) { {"message" => '{"host": "linux02"}', "beat" => {"hostname" => "linux01"} } }
201
-
202
- it "should take the host from the JSON message" do
203
- beats.create_event(event_map, identity_stream) do
204
- expect(event["host"]).to eq("linux02")
205
- end
206
- end
114
+ event = pipeline_queue.shift
115
+ expect(event["tags"]).to include("beats_input_flushed_by_end_of_connection")
207
116
  end
208
117
  end
209
118
  end