logstash-input-beats 2.0.3 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats_support/event_transform_common"
3
+ module LogStash::Inputs::BeatsSupport
4
+ # Take the extracted content from the codec, merged with the other data coming
5
+ # from beats, apply the configured tags, normalize the host and try to coerce
6
+ # the timestamp if it was provided in the hash.
7
+ class DecodedEventTransform < EventTransformCommon
8
+ def transform(event, hash)
9
+ ts = coerce_ts(hash.delete("@timestamp"))
10
+
11
+ event["@timestamp"] = ts unless ts.nil?
12
+ hash.each { |k, v| event[k] = v }
13
+ super(event)
14
+ event.tag("beats_input_codec_#{@input.codec.base_codec.class.config_name}_applied")
15
+ event
16
+ end
17
+
18
+ private
19
+ def coerce_ts(ts)
20
+ return nil if ts.nil?
21
+ timestamp = LogStash::Timestamp.coerce(ts)
22
+
23
+ return timestamp if timestamp
24
+
25
+ @logger.warn("Unrecognized @timestamp value, setting current time to @timestamp",
26
+ :value => ts.inspect)
27
+ return nil
28
+ rescue LogStash::TimestampParserError => e
29
+ @logger.warn("Error parsing @timestamp string, setting current time to @timestamp",
30
+ :value => ts.inspect, :exception => e.message)
31
+ return nil
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+ module LogStash::Inputs::BeatsSupport
3
+ # Base Transform class, expose the plugin decorate method,
4
+ # apply the tags and make sure we copy the beat hostname into `host`
5
+ # for backward compatibility.
6
+ class EventTransformCommon
7
+ def initialize(input)
8
+ @input = input
9
+ @logger = input.logger
10
+ end
11
+
12
+ # Copies the beat.hostname field into the host field unless
13
+ # the host field is already defined
14
+ def copy_beat_hostname(event)
15
+ host = event["[beat][hostname]"]
16
+
17
+ if host && event["host"].nil?
18
+ event["host"] = host
19
+ end
20
+ end
21
+
22
+ # This break the `#decorate` method visibility of the plugin base
23
+ # class, the method is protected and we cannot access it, but well ruby
24
+ # can let you do all the wrong thing.
25
+ #
26
+ # I think the correct behavior would be to allow the plugin to return a
27
+ # `Decorator` object that we can pass to other objects, since only the
28
+ # plugin know the data used to decorate. This would allow a more component
29
+ # based workflow.
30
+ def decorate(event)
31
+ @input.send(:decorate, event)
32
+ end
33
+
34
+ def transform(event)
35
+ copy_beat_hostname(event)
36
+ decorate(event)
37
+ event
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: utf-8
2
+ require "logstash/inputs/beats_support/event_transform_common"
3
+ module LogStash::Inputs::BeatsSupport
4
+ # Take the the raw output from the library, decorate it with
5
+ # the configured tags in the plugins and normalize the hostname
6
+ # for backward compatibility
7
+ #
8
+ #
9
+ # @see [Lumberjack::Beats::Parser]
10
+ #
11
+ class RawEventTransform < EventTransformCommon
12
+ def transform(event)
13
+ super(event)
14
+ event.tag("beats_input_raw_event")
15
+ event
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+ module LogStash::Inputs::BeatsSupport
3
+ # Wrap the `Java SynchronousQueue` to acts as the synchronization mechanism
4
+ # this queue can block for a maximum amount of time logstash's queue
5
+ # doesn't implement that feature.
6
+ #
7
+ # See proposal for core: https://github.com/elastic/logstash/pull/4408
8
+ #
9
+ # See https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/SynchronousQueue.html
10
+ java_import "java.util.concurrent.SynchronousQueue"
11
+ java_import "java.util.concurrent.TimeUnit"
12
+ class SynchronousQueueWithOffer
13
+ def initialize(timeout, fairness_policy = true)
14
+ # set Fairness policy to `FIFO`
15
+ #
16
+ # In the context of the input it makes sense to
17
+ # try to deal with the older connection before
18
+ # the newer one, since the older will be closer to
19
+ # reach the connection timeout.
20
+ #
21
+ @timeout = timeout
22
+ @queue = java.util.concurrent.SynchronousQueue.new(fairness_policy)
23
+ end
24
+
25
+ # This method will return true if it successfully added the element to the queue.
26
+ # If the timeout is reached and it wasn't inserted successfully to
27
+ # the queue it will return false.
28
+ def offer(element, timeout = nil)
29
+ @queue.offer(element, timeout || @timeout, java.util.concurrent.TimeUnit::SECONDS)
30
+ end
31
+
32
+ def take
33
+ @queue.take
34
+ end
35
+ end
36
+ end
@@ -57,6 +57,13 @@ module Lumberjack module Beats
57
57
  end
58
58
  end # def initialize
59
59
 
60
+ # Server#run method, allow the library to manage all the connection
61
+ # threads, this handing is quite minimal and don't handler
62
+ # all the possible cases deconnection/connection.
63
+ #
64
+ # To have a more granular control over the connection you should manage
65
+ # them yourself, see Server#accept method which return a Connection
66
+ # instance.
60
67
  def run(&block)
61
68
  while !closed?
62
69
  connection = accept
@@ -67,7 +74,14 @@ module Lumberjack module Beats
67
74
  next unless connection
68
75
 
69
76
  Thread.new(connection) do |connection|
70
- connection.run(&block)
77
+ begin
78
+ connection.run(&block)
79
+ rescue Lumberjack::Beats::Connection::ConnectionClosed
80
+ # Connection will raise a wrapped exception upstream,
81
+ # but if the threads are managed by the library we can simply ignore it.
82
+ #
83
+ # Note: This follow the previous behavior of the perfect silence.
84
+ end
71
85
  end
72
86
  end
73
87
  end # def run
@@ -132,6 +146,7 @@ module Lumberjack module Beats
132
146
  PROTOCOL_VERSION_2 = "2".ord
133
147
 
134
148
  SUPPORTED_PROTOCOLS = [PROTOCOL_VERSION_1, PROTOCOL_VERSION_2]
149
+ class UnsupportedProtocol < StandardError; end
135
150
 
136
151
  def initialize
137
152
  @buffer_offset = 0
@@ -222,7 +237,7 @@ module Lumberjack module Beats
222
237
  if supported_protocol?(version)
223
238
  yield :version, version
224
239
  else
225
- raise "unsupported protocol #{version}"
240
+ raise UnsupportedProtocol, "unsupported protocol #{version}"
226
241
  end
227
242
  end
228
243
 
@@ -298,9 +313,37 @@ module Lumberjack module Beats
298
313
  end # class Parser
299
314
 
300
315
  class Connection
316
+ # Wrap the original exception into a common one,
317
+ # to make upstream managing and reporting easier
318
+ # But lets make sure we keep the meaning of the original exception.
319
+ class ConnectionClosed < StandardError
320
+ attr_reader :original_exception
321
+
322
+ def initialize(original_exception)
323
+ super(original_exception)
324
+
325
+ @original_exception = original_exception
326
+ set_backtrace(original_exception.backtrace) if original_exception
327
+ end
328
+
329
+ def to_s
330
+ "#{self.class.name} wrapping: #{original_exception.class.name}, #{super.to_s}"
331
+ end
332
+ end
333
+
301
334
  READ_SIZE = 16384
335
+ PEER_INFORMATION_NOT_AVAILABLE = "<PEER INFORMATION NOT AVAILABLE>"
336
+ RESCUED_CONNECTION_EXCEPTIONS = [
337
+ EOFError,
338
+ OpenSSL::SSL::SSLError,
339
+ IOError,
340
+ Errno::ECONNRESET,
341
+ Errno::EPIPE,
342
+ Lumberjack::Beats::Parser::UnsupportedProtocol
343
+ ]
302
344
 
303
345
  attr_accessor :server
346
+ attr_reader :peer
304
347
 
305
348
  def initialize(fd, server)
306
349
  @parser = Parser.new
@@ -308,23 +351,27 @@ module Lumberjack module Beats
308
351
 
309
352
  @server = server
310
353
  @ack_handler = nil
311
- end
312
-
313
- def peer
314
- "#{@fd.peeraddr[3]}:#{@fd.peeraddr[1]}"
354
+
355
+ # Fetch the details of the host before reading anything from the socket
356
+ # se we can use that information when debugging connection issues with
357
+ # remote hosts.
358
+ begin
359
+ @peer = "#{@fd.peeraddr[3]}:#{@fd.peeraddr[1]}"
360
+ rescue IOError
361
+ # This can happen if the connection is drop or close before
362
+ # fetching the host details, lets return a generic string.
363
+ @peer = PEER_INFORMATION_NOT_AVAILABLE
364
+ end
315
365
  end
316
366
 
317
367
  def run(&block)
318
368
  while !server.closed?
319
369
  read_socket(&block)
320
370
  end
321
- rescue EOFError,
322
- OpenSSL::SSL::SSLError,
323
- IOError,
324
- Errno::ECONNRESET,
325
- Errno::EPIPE
371
+ rescue *RESCUED_CONNECTION_EXCEPTIONS => e
326
372
  # EOF or other read errors, only action is to shutdown which we'll do in
327
373
  # 'ensure'
374
+ raise ConnectionClosed.new(e)
328
375
  rescue
329
376
  # when the server is shutting down we can safely ignore any exceptions
330
377
  # On windows, we can get a `SystemCallErr`
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "logstash-input-beats"
3
- s.version = "2.0.3"
3
+ s.version = "2.1.1"
4
4
  s.licenses = ["Apache License (2.0)"]
5
5
  s.summary = "Receive events using the lumberjack protocol."
6
6
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.require_paths = ["lib"]
11
11
 
12
12
  # Files
13
- s.files = Dir["lib/**/*","spec/**/*","vendor/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT"]
13
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT"]
14
14
 
15
15
  # Tests
16
16
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -23,7 +23,8 @@ Gem::Specification.new do |s|
23
23
 
24
24
  s.add_runtime_dependency "logstash-codec-plain"
25
25
  s.add_runtime_dependency "concurrent-ruby", "~> 0.9.2"
26
- s.add_runtime_dependency "logstash-codec-multiline", "~> 2.0.3"
26
+ s.add_runtime_dependency "thread_safe", "~> 0.3.5"
27
+ s.add_runtime_dependency "logstash-codec-multiline", "~> 2.0.5"
27
28
 
28
29
  s.add_development_dependency "flores", "~>0.0.6"
29
30
  s.add_development_dependency "rspec"
@@ -70,140 +70,49 @@ describe LogStash::Inputs::Beats do
70
70
  end
71
71
  end
72
72
 
73
- describe "#processing of events" do
74
- subject(:beats) { LogStash::Inputs::Beats.new(config) }
75
- let(:codec) { LogStash::Codecs::Multiline.new("pattern" => '\n', "what" => "previous") }
73
+ context "#handle_new_connection" do
74
+ let(:config) {{ "ssl" => false, "port" => 0, "type" => "example", "tags" => "beats" }}
75
+ let(:plugin) { LogStash::Inputs::Beats.new(config) }
76
+ let(:connection) { DummyConnection.new(events) }
77
+ let(:buffer_queue) { DummyNeverBlockedQueue.new }
78
+ let(:pipeline_queue) { [] }
79
+ let(:events) {
80
+ [
81
+ { :map => { "id" => 1 }, :identity_stream => "/var/log/message" },
82
+ { :map => { "id" => 2 }, :identity_stream => "/var/log/message_2" }
83
+ ]
84
+ }
85
+
86
+ before :each do
87
+ plugin.register
88
+
89
+ # Event if we dont mock the actual socket work
90
+ # we have to call run because it will correctly setup the queues
91
+ # instances variables
92
+ t = Thread.new do
93
+ plugin.run(pipeline_queue)
94
+ end
76
95
 
77
- let(:config) do
78
- { "port" => port, "ssl_certificate" => certificate.ssl_cert, "ssl_key" => certificate.ssl_key,
79
- "type" => "example", "codec" => codec }
96
+ sleep(0.1) until t.status == "run"
80
97
  end
81
98
 
82
- before do
83
- beats.register
99
+ after :each do
100
+ plugin.stop
84
101
  end
85
102
 
86
- context "#create_event" do
87
- let(:config) { super.merge({ "add_field" => { "foo" => "bar", "[@metadata][hidden]" => "secret"}, "tags" => ["bonjour"]}) }
88
- let(:event_map) { { "hello" => "world" } }
89
- let(:codec) { LogStash::Codecs::Plain.new }
90
- let(:identity_stream) { "custom-type-input_type-source" }
91
-
92
- context "without a `target_field` defined" do
93
- it "decorates the event" do
94
- beats.create_event(event_map, identity_stream) do |event|
95
- expect(event["foo"]).to eq("bar")
96
- expect(event["[@metadata][hidden]"]).to eq("secret")
97
- expect(event["tags"]).to include("bonjour")
98
- end
99
- end
100
- end
101
-
102
- context "with a `target_field` defined" do
103
- let(:event_map) { super.merge({"message" => "with a field"}) }
104
-
105
- it "decorates the event" do
106
- beats.create_event(event_map, identity_stream) do |event|
107
- expect(event["foo"]).to eq("bar")
108
- expect(event["[@metadata][hidden]"]).to eq("secret")
109
- expect(event["tags"]).to include("bonjour")
110
- end
111
- end
112
- end
113
-
114
- context "when data is buffered in the codec" do
115
- let(:codec) { LogStash::Codecs::Multiline.new("pattern" => '^\s', "what" => "previous") }
116
- let(:event_map) { {"message" => "hello?", "tags" => ["syslog"]} }
117
-
118
- it "returns nil" do
119
- expect { |b| beats.create_event(event_map, identity_stream, &b) }.not_to yield_control
120
- end
121
- end
122
-
123
- context "multiline" do
124
- let(:codec) { LogStash::Codecs::Multiline.new("pattern" => '^2015', "what" => "previous", "negate" => true) }
125
- let(:events_map) do
126
- [
127
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "2015-11-10 10:14:38,907 line 1" },
128
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 1.1" },
129
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "2015-11-10 10:16:38,907 line 2" },
130
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 2.1" },
131
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 2.2" },
132
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "line 2.3" },
133
- { "beat" => { "id" => "main", "resource_id" => "md5"}, "message" => "2015-11-10 10:18:38,907 line 3" }
134
- ]
135
- end
136
-
137
- let(:queue) { [] }
138
- before do
139
- Thread.new { beats.run(queue) }
140
- sleep(0.1)
141
- end
142
-
143
- it "should correctly merge multiple events" do
144
- events_map.each { |map| beats.create_event(map, identity_stream) { |e| queue << e } }
145
- # This cannot currently work without explicitely call a flush
146
- # the flush is never timebased, if no new data is coming in we wont flush the buffer
147
- # https://github.com/logstash-plugins/logstash-codec-multiline/issues/11
148
- beats.stop
149
- expect(queue.size).to eq(3)
150
-
151
- expect(queue.collect { |e| e["message"] }).to include("2015-11-10 10:14:38,907 line 1\nline 1.1",
152
- "2015-11-10 10:16:38,907 line 2\nline 2.1\nline 2.2\nline 2.3",
153
- "2015-11-10 10:18:38,907 line 3")
154
- end
155
- end
156
-
157
- context "with a beat.hostname field" do
158
- let(:event_map) { {"message" => "hello", "beat" => {"hostname" => "linux01"} } }
159
-
160
- it "copies it to the host field" do
161
- beats.create_event(event_map, identity_stream) do |event|
162
- expect(event["host"]).to eq("linux01")
163
- end
164
- end
103
+ context "when an exception occur" do
104
+ let(:connection_handler) { LogStash::Inputs::BeatsSupport::ConnectionHandler.new(connection, plugin, buffer_queue) }
105
+ before do
106
+ expect(LogStash::Inputs::BeatsSupport::ConnectionHandler).to receive(:new).with(any_args).and_return(connection_handler)
165
107
  end
166
108
 
167
- context "with a beat.hostname field but without the message" do
168
- let(:event_map) { {"beat" => {"hostname" => "linux01"} } }
109
+ it "calls flush on the handler and tag the events" do
110
+ expect(connection_handler).to receive(:accept) { raise LogStash::Inputs::Beats::InsertingToQueueTakeTooLong }
111
+ expect(connection_handler).to receive(:flush).and_yield(LogStash::Event.new)
112
+ plugin.handle_new_connection(connection)
169
113
 
170
- it "copies it to the host field" do
171
- beats.create_event(event_map, identity_stream) do |event|
172
- expect(event["host"]).to eq("linux01")
173
- end
174
- end
175
- end
176
-
177
- context "without a beat.hostname field" do
178
- let(:event_map) { {"message" => "hello", "beat" => {"name" => "linux01"} } }
179
-
180
- it "should not add a host field" do
181
- beats.create_event(event_map, identity_stream) do |event|
182
- expect(event["beat"]["name"]).to eq("linux01")
183
- expect(event["host"]).to be_nil
184
- end
185
- end
186
- end
187
-
188
- context "with a beat.hostname and host fields" do
189
- let(:event_map) { {"message" => "hello", "host" => "linux02", "beat" => {"hostname" => "linux01"} } }
190
-
191
- it "should not overwrite host" do
192
- beats.create_event(event_map, identity_stream) do |event|
193
- expect(event["host"]).to eq("linux02")
194
- end
195
- end
196
- end
197
-
198
- context "with a host field in the message" do
199
- let(:codec) { LogStash::Codecs::JSON.new }
200
- let(:event_map) { {"message" => '{"host": "linux02"}', "beat" => {"hostname" => "linux01"} } }
201
-
202
- it "should take the host from the JSON message" do
203
- beats.create_event(event_map, identity_stream) do
204
- expect(event["host"]).to eq("linux02")
205
- end
206
- end
114
+ event = pipeline_queue.shift
115
+ expect(event["tags"]).to include("beats_input_flushed_by_end_of_connection")
207
116
  end
208
117
  end
209
118
  end