logstash-codec-joinlines 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,301 @@
1
+ # encoding: utf-8
2
+ require "logstash/codecs/base"
3
+ require "logstash/util/charset"
4
+ require "logstash/timestamp"
5
+ require "logstash/codecs/auto_flush"
6
+
7
+ # The joinlines codec will join lines mathcing specified patterns.
8
+ # It is based on the multiline codec, but offers the opportunity to
9
+ # specify a list of patterns, whats and negates. The lists must be
10
+ # of equal length.
11
+ #
12
+ # IMPORTANT: If you are using a Logstash input plugin that supports multiple
13
+ # hosts, such as the <<plugins-inputs-beats>> input plugin, you should not use
14
+ # the joinlines codec to handle multiline events. Doing so may result in the
15
+ # mixing of streams and corrupted event data. In this situation, you need to
16
+ # handle multiline events before sending the event data to Logstash.
17
+ #
18
+ # Example usage
19
+ # [source,ruby]
20
+ # input {
21
+ # stdin {
22
+ # codec => joinlines {
23
+ # patterns => [ "^The following message", "^\s*at" ]
24
+ # what => [ "next", "previous" ]
25
+ # negate => [ false, false ]
26
+ # }
27
+ # }
28
+ # }
29
+ #
30
+ # The example above will join lines starting with "The following message"
31
+ # with the next line, and stack traces with the previous line.
32
+ #
33
+ module LogStash module Codecs class Joinlines < LogStash::Codecs::Base
34
+
35
+ # The codec name
36
+ config_name "joinlines"
37
+
38
+ # The patterns to recognize
39
+ config :patterns, :validate => :string, :list => true, :required => true
40
+
41
+ # The patterns to recognize
42
+ config :what, :validate => ["previous", "next"], :list => true, :required => true
43
+
44
+ # Negate match?
45
+ config :negate, :validate => :boolean, :list => true, :required => true
46
+
47
+ # Logstash ships by default with a bunch of patterns, so you don't
48
+ # necessarily need to define this yourself unless you are adding additional
49
+ # patterns.
50
+ #
51
+ # Pattern files are plain text with format:
52
+ # [source,ruby]
53
+ # NAME PATTERN
54
+ #
55
+ # For example:
56
+ # [source,ruby]
57
+ # NUMBER \d+
58
+ config :patterns_dir, :validate => :array, :default => []
59
+
60
+ # The character encoding used in this input. Examples include `UTF-8`
61
+ # and `cp1252`
62
+ #
63
+ # This setting is useful if your log files are in `Latin-1` (aka `cp1252`)
64
+ # or in another character set other than `UTF-8`.
65
+ #
66
+ # This only affects "plain" format logs since JSON is `UTF-8` already.
67
+ config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
68
+
69
+ # Tag multiline events with a given tag. This tag will only be added
70
+ # to events that actually have multiple lines in them.
71
+ config :multiline_tag, :validate => :string, :default => "joinlines"
72
+
73
+ # The accumulation of events can make logstash exit with an out of memory error
74
+ # if event boundaries are not correctly defined. This settings make sure to flush
75
+ # multiline events after reaching a number of lines, it is used in combination
76
+ # max_bytes.
77
+ config :max_lines, :validate => :number, :default => 500
78
+
79
+ # The accumulation of events can make logstash exit with an out of memory error
80
+ # if event boundaries are not correctly defined. This settings make sure to flush
81
+ # multiline events after reaching a number of bytes, it is used in combination
82
+ # max_lines.
83
+ config :max_bytes, :validate => :bytes, :default => "10 MiB"
84
+
85
+ # The accumulation of multiple lines will be converted to an event when either a
86
+ # matching new line is seen or there has been no new data appended for this many
87
+ # seconds. No default. If unset, no auto_flush. Units: seconds
88
+ config :auto_flush_interval, :validate => :number
89
+
90
+ public
91
+ def register
92
+ require "grok-pure" # rubygem 'jls-grok'
93
+ require 'logstash/patterns/core'
94
+
95
+ @matching = ""
96
+
97
+ # Detect if we are running from a jarfile, pick the right path.
98
+ patterns_path = []
99
+ patterns_path += [LogStash::Patterns::Core.path]
100
+
101
+ @patterns_dir = patterns_path.to_a + @patterns_dir
102
+ @groks = []
103
+ @handlers = []
104
+
105
+ @patterns.zip(@what).each do |pattern,what|
106
+ grok = Grok.new
107
+
108
+ @patterns_dir.each do |path|
109
+ if ::File.directory?(path)
110
+ path = ::File.join(path, "*")
111
+ end
112
+
113
+ Dir.glob(path).each do |file|
114
+ @logger.debug("Grok loading patterns from file", :path => file)
115
+ grok.add_patterns_from_file(file)
116
+ end
117
+ end
118
+
119
+ grok.compile(pattern)
120
+ handler = method("do_#{what}".to_sym)
121
+
122
+ @groks.push(grok)
123
+ @handlers.push(handler)
124
+ end
125
+
126
+ @logger.trace("Registered joinlines plugin", :type => @type, :config => @config)
127
+ reset_buffer
128
+
129
+ @converter = LogStash::Util::Charset.new(@charset)
130
+ @converter.logger = @logger
131
+
132
+ if @auto_flush_interval
133
+ # will start on first decode
134
+ @auto_flush_runner = AutoFlush.new(self, @auto_flush_interval)
135
+ end
136
+ end # def register
137
+
138
+ def use_mapper_auto_flush
139
+ return unless auto_flush_active?
140
+ @auto_flush_runner = AutoFlushUnset.new(nil, nil)
141
+ @auto_flush_interval = @auto_flush_interval.to_f
142
+ end
143
+
144
+ def accept(listener)
145
+ # memoize references to listener that holds upstream state
146
+ @previous_listener = @last_seen_listener || listener
147
+ @last_seen_listener = listener
148
+
149
+ internal_decode(listener.data) do |event,what|
150
+ what_based_listener(what).process_event(event)
151
+ end
152
+ end
153
+
154
+ def zip_config
155
+ @patterns.zip(@what, @negate, @groks, @handlers)
156
+ end
157
+
158
+ #private
159
+ def internal_decode(text, &block)
160
+ do_flush = false
161
+ text = @converter.convert(text)
162
+ text.split("\n").each do |line|
163
+ matched = false
164
+ zip_config.each do |pattern,what,negate,grok,handler|
165
+ match = grok.match(line)
166
+ @logger.debug("Joinlines", :pattern => pattern, :text => line,
167
+ :match => (match != false), :negate => negate)
168
+
169
+ # Add negate option
170
+ match = (match and !negate) || (!match and negate)
171
+
172
+ if match
173
+ do_flush = (what == "next" and @matching != "next")
174
+ matched = true
175
+ @matching = what
176
+ break
177
+ end
178
+ end
179
+
180
+ if !matched
181
+ do_flush = (@matching != "next")
182
+ @matching = ""
183
+ end
184
+
185
+ if do_flush
186
+ flush do |event|
187
+ yield(event,@matching)
188
+ end
189
+ do_flush = false
190
+ end
191
+
192
+ auto_flush_runner.start
193
+ buffer(line)
194
+ end
195
+ end
196
+
197
+ public
198
+ def decode(text, &block)
199
+ internal_decode(text) do |event,what|
200
+ yield(event)
201
+ end
202
+ end # def decode
203
+
204
+ def buffer(text)
205
+ @buffer_bytes += text.bytesize
206
+ @buffer.push(text)
207
+ end
208
+
209
+ def flush(&block)
210
+ if block_given? && @buffer.any?
211
+ no_error = true
212
+ events = merge_events
213
+ begin
214
+ yield events
215
+ rescue ::Exception => e
216
+ # need to rescue everything
217
+ # likliest cause: backpressure or timeout by exception
218
+ # can't really do anything but leave the data in the buffer for next time if there is one
219
+ @logger.error("Joinlines: flush downstream error", :exception => e)
220
+ no_error = false
221
+ end
222
+ reset_buffer if no_error
223
+ end
224
+ end
225
+
226
+ def auto_flush(listener = @last_seen_listener)
227
+ return if listener.nil?
228
+
229
+ flush do |event|
230
+ listener.process_event(event)
231
+ end
232
+ end
233
+
234
+ def merge_events
235
+ event = LogStash::Event.new(LogStash::Event::TIMESTAMP => @time, "message" => @buffer.join(NL))
236
+ event.tag @multiline_tag if !@multiline_tag.empty? && @buffer.size > 1
237
+ event.tag "joinlines_codec_max_bytes_reached" if over_maximum_bytes?
238
+ event.tag "joinlines_codec_max_lines_reached" if over_maximum_lines?
239
+ event
240
+ end
241
+
242
+ def reset_buffer
243
+ @buffer = []
244
+ @buffer_bytes = 0
245
+ end
246
+
247
+ def doing_previous?(what)
248
+ what != "next"
249
+ end
250
+
251
+ def what_based_listener(what)
252
+ doing_previous?(what) ? @previous_listener : @last_seen_listener
253
+ end
254
+
255
+ def do_next(text, matched, &block)
256
+ buffer(text)
257
+ auto_flush_runner.start
258
+ flush(&block) if !matched || buffer_over_limits?
259
+ end
260
+
261
+ def do_previous(text, matched, &block)
262
+ flush(&block) if !matched || buffer_over_limits?
263
+ auto_flush_runner.start
264
+ buffer(text)
265
+ end
266
+
267
+ def over_maximum_lines?
268
+ @buffer.size > @max_lines
269
+ end
270
+
271
+ def over_maximum_bytes?
272
+ @buffer_bytes >= @max_bytes
273
+ end
274
+
275
+ def buffer_over_limits?
276
+ over_maximum_lines? || over_maximum_bytes?
277
+ end
278
+
279
+ def encode(event)
280
+ # Nothing to do.
281
+ @on_event.call(event, event)
282
+ end # def encode
283
+
284
+ def close
285
+ auto_flush_runner.stop
286
+ end
287
+
288
+ def auto_flush_active?
289
+ !@auto_flush_interval.nil?
290
+ end
291
+
292
+ def auto_flush_runner
293
+ @auto_flush_runner || AutoFlushUnset.new(nil, nil)
294
+ end
295
+
296
+ def initialize_copy(source)
297
+ super
298
+ register
299
+ end
300
+
301
+ end end end # class LogStash::Codecs::Joinlines
@@ -0,0 +1,81 @@
1
+ require "concurrent"
2
+
3
+ module LogStash module Codecs class RetriggerableTask
4
+ SLEEP_FOR = 0.25.freeze
5
+
6
+ attr_reader :thread
7
+
8
+ def initialize(delay, listener)
9
+ @count = calculate_count(delay)
10
+ @listener = listener
11
+ @counter = Concurrent::AtomicFixnum.new(0 + @count)
12
+ @stopped = Concurrent::AtomicBoolean.new(false)
13
+ @semaphore = Concurrent::Semaphore.new(1)
14
+ end
15
+
16
+ def retrigger
17
+ return if stopped?
18
+ if executing?
19
+ @semaphore.acquire
20
+ end
21
+
22
+ if pending?
23
+ reset_counter
24
+ else
25
+ start
26
+ end
27
+ end
28
+
29
+ def close
30
+ @stopped.make_true
31
+ end
32
+
33
+ def counter
34
+ @counter.value
35
+ end
36
+
37
+ def executing?
38
+ running? && counter < 1
39
+ end
40
+
41
+ def pending?
42
+ running? && counter > 0
43
+ end
44
+
45
+ private
46
+
47
+ def calculate_count(value)
48
+ # in multiples of SLEEP_FOR (0.25) seconds
49
+ # if delay is 10 seconds then count is 40
50
+ # this only works when SLEEP_FOR is less than 1
51
+ return 1 if value < SLEEP_FOR
52
+ (value / SLEEP_FOR).floor
53
+ end
54
+
55
+ def reset_counter
56
+ @counter.value = 0 + @count
57
+ end
58
+
59
+ def running?
60
+ @thread && @thread.alive?
61
+ end
62
+
63
+ def start()
64
+ reset_counter
65
+ @thread = Thread.new do
66
+ while counter > 0
67
+ break if stopped?
68
+ sleep SLEEP_FOR
69
+ @counter.decrement
70
+ end
71
+
72
+ @semaphore.drain_permits
73
+ @listener.timeout if !stopped?
74
+ @semaphore.release
75
+ end
76
+ end
77
+
78
+ def stopped?
79
+ @stopped.value
80
+ end
81
+ end end end
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-codec-joinlines'
3
+ s.version = '0.1.0'
4
+ s.licenses = ['Apache-2.0']
5
+ s.summary = 'Merges multiline messages into a single event, allowing for multiple patterns.'
6
+ s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
7
+ s.homepage = 'https://github.com/lovmoen/logstash-codec-joinlines'
8
+ s.authors = ['Svein L. Ellingsen (lovmoen)']
9
+ s.email = 'lovmoen@gmail.com'
10
+ s.require_paths = ['lib']
11
+
12
+ # Files
13
+ s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
14
+ # Tests
15
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
16
+
17
+ # Special flag to let us know this is actually a logstash plugin
18
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "codec" }
19
+
20
+ # Gem dependencies
21
+ s.add_runtime_dependency 'logstash-core-plugin-api', "~> 2.0"
22
+ s.add_runtime_dependency 'logstash-codec-line'
23
+
24
+ s.add_runtime_dependency 'logstash-patterns-core'
25
+ s.add_runtime_dependency 'jls-grok', '~> 0.11.1'
26
+
27
+ s.add_development_dependency 'logstash-devutils'
28
+ end
@@ -0,0 +1,435 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/codecs/joinlines"
4
+ require "logstash/event"
5
+ require "insist"
6
+ require_relative '../spec_helper'
7
+
8
+ # above helper also defines a subclass of Joinlines
9
+ # called JoinlinesRspec that exposes the internal buffer
10
+ # and a Logger Mock
11
+
12
+ describe LogStash::Codecs::Joinlines do
13
+ context "#multipatterns" do
14
+ let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
15
+ let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
16
+ let(:events) { [] }
17
+ let(:line_producer) do
18
+ lambda do |lines|
19
+ lines.each do |line|
20
+ codec.decode(line) do |event|
21
+ events << event
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ it "should internally decode lines to (event, what) pairs" do
28
+ config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
29
+ text = "hello world\n second line\nanother first line\nnext\nowns previous\n"
30
+
31
+ events = []
32
+ whats = []
33
+ codec.internal_decode(text) do |event,what|
34
+ events.push(event)
35
+ whats.push(what)
36
+ end
37
+
38
+ # Must flush to get last event
39
+ codec.flush do |event|
40
+ events.push(event)
41
+ whats.push("final") # dummy
42
+ end
43
+
44
+ expect(events.size).to eq(3)
45
+ expect(whats.size).to eq(3)
46
+ expect(events[0].get("message")).to eq("hello world\n second line")
47
+ expect(events[1].get("message")).to eq("another first line")
48
+ expect(events[2].get("message")).to eq("next\nowns previous")
49
+ end
50
+
51
+ it "should break between consecutive previous and next" do
52
+ config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
53
+ lines = [ "hello world", " second line", "next", "owns previous" ]
54
+ line_producer.call(lines)
55
+ codec.flush { |e| events << e }
56
+
57
+ expect(events.size).to eq(2)
58
+ expect(events[0].get("message")).to eq "hello world\n second line"
59
+ expect(events[0].get("tags")).to include("joinlines")
60
+ expect(events[1].get("message")).to eq "next\nowns previous"
61
+ expect(events[1].get("tags")).to include("joinlines")
62
+ end
63
+
64
+ it "should stitch together consecutive next and previous" do
65
+ config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
66
+ lines = [ "next", "owns previous and next", " second line", "another first" ]
67
+ line_producer.call(lines)
68
+ codec.flush { |e| events << e }
69
+
70
+ expect(events.size).to eq(2)
71
+ expect(events[0].get("message")).to eq "next\nowns previous and next\n second line"
72
+ expect(events[0].get("tags")).to include("joinlines")
73
+ expect(events[1].get("message")).to eq "another first"
74
+ expect(events[1].get("tags")).to be_nil
75
+ end
76
+ end
77
+
78
+ context "#decode" do
79
+ let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
80
+ let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
81
+ let(:events) { [] }
82
+ let(:line_producer) do
83
+ lambda do |lines|
84
+ lines.each do |line|
85
+ codec.decode(line) do |event|
86
+ events << event
87
+ end
88
+ end
89
+ end
90
+ end
91
+
92
+ it "should be able to handle multiline events with additional lines space-indented" do
93
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
94
+ lines = [ "hello world", " second line", "another first line" ]
95
+ line_producer.call(lines)
96
+ codec.flush { |e| events << e }
97
+
98
+ expect(events.size).to eq(2)
99
+ expect(events[0].get("message")).to eq "hello world\n second line"
100
+ expect(events[0].get("tags")).to include("joinlines")
101
+ expect(events[1].get("message")).to eq "another first line"
102
+ expect(events[1].get("tags")).to be_nil
103
+ end
104
+
105
+ it "should allow custom tag added to multiline events" do
106
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false, "multiline_tag" => "hurray")
107
+ lines = [ "hello world", " second line", "another first line" ]
108
+ line_producer.call(lines)
109
+ codec.flush { |e| events << e }
110
+
111
+ expect(events.size).to eq 2
112
+ expect(events[0].get("tags")).to include("hurray")
113
+ expect(events[1].get("tags")).to be_nil
114
+ end
115
+
116
+ it "should handle new lines in messages" do
117
+ config.update("patterns" => '\D', "what" => "previous", "negate" => false)
118
+ lineio = StringIO.new("1234567890\nA234567890\nB234567890\n0987654321\n")
119
+ until lineio.eof
120
+ line = lineio.read(256) #when this is set to 36 the tests fail
121
+ codec.decode(line) {|evt| events.push(evt)}
122
+ end
123
+ codec.flush { |e| events << e }
124
+ expect(events[0].get("message")).to eq "1234567890\nA234567890\nB234567890"
125
+ expect(events[1].get("message")).to eq "0987654321"
126
+ end
127
+
128
+ it "should allow grok patterns to be used" do
129
+ config.update(
130
+ "patterns" => "^%{NUMBER} %{TIME}",
131
+ "negate" => true,
132
+ "what" => "previous"
133
+ )
134
+
135
+ lines = [ "120913 12:04:33 first line", "second line", "third line" ]
136
+
137
+ line_producer.call(lines)
138
+ codec.flush { |e| events << e }
139
+
140
+ insist { events.size } == 1
141
+ insist { events.first.get("message") } == lines.join("\n")
142
+ end
143
+
144
+ context "using default UTF-8 charset" do
145
+
146
+ it "should decode valid UTF-8 input" do
147
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
148
+ lines = [ "foobar", "κόσμε" ]
149
+ lines.each do |line|
150
+ expect(line.encoding.name).to eq "UTF-8"
151
+ expect(line.valid_encoding?).to be_truthy
152
+ codec.decode(line) { |event| events << event }
153
+ end
154
+
155
+ codec.flush { |e| events << e }
156
+ expect(events.size).to eq 2
157
+
158
+ events.zip(lines).each do |tuple|
159
+ expect(tuple[0].get("message")).to eq tuple[1]
160
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
161
+ end
162
+ end
163
+
164
+ it "should escape invalid sequences" do
165
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
166
+ lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
167
+ lines.each do |line|
168
+ expect(line.encoding.name).to eq "UTF-8"
169
+ expect(line.valid_encoding?).to eq false
170
+
171
+ codec.decode(line) { |event| events << event }
172
+ end
173
+ codec.flush { |e| events << e }
174
+ expect(events.size).to eq 2
175
+
176
+ events.zip(lines).each do |tuple|
177
+ expect(tuple[0].get("message")).to eq tuple[1].inspect[1..-2]
178
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
179
+ end
180
+ end
181
+
182
+ it "decodes and joins multiple patterns" do
183
+ config.update("patterns" => [ "^\\s", "^the following" ], "what" => [ "previous", "next" ], "negate" => [ false, false] )
184
+ lines = [ "hello world", " second line", "another first line", "the following message belongs to next", "I own the previous", "Another first" ]
185
+
186
+ lines.each do |line|
187
+ codec.decode(line) do |event|
188
+ events << event
189
+ end
190
+ end
191
+
192
+ codec.flush { |e| events << e }
193
+
194
+ #expect(events.size).to eq(4)
195
+ expect(events[0].get("message")).to eq "hello world\n second line"
196
+ expect(events[0].get("tags")).to include("joinlines")
197
+ expect(events[1].get("message")).to eq "another first line"
198
+ expect(events[1].get("tags")).to be_nil
199
+ expect(events[2].get("message")).to eq "the following message belongs to next\nI own the previous"
200
+ expect(events[2].get("tags")).to include("joinlines")
201
+ expect(events[3].get("message")).to eq "Another first"
202
+ expect(events[3].get("tags")).to be_nil
203
+ end
204
+ end
205
+
206
+
207
+ context "with valid non UTF-8 source encoding" do
208
+
209
+ it "should encode to UTF-8" do
210
+ config.update("charset" => "ISO-8859-1", "patterns" => "^\\s", "what" => "previous", "negate" => false)
211
+ samples = [
212
+ ["foobar", "foobar"],
213
+ ["\xE0 Montr\xE9al", "à Montréal"],
214
+ ]
215
+
216
+ # lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
217
+ samples.map{|(a, b)| a.force_encoding("ISO-8859-1")}.each do |line|
218
+ expect(line.encoding.name).to eq "ISO-8859-1"
219
+ expect(line.valid_encoding?).to eq true
220
+
221
+ codec.decode(line) { |event| events << event }
222
+ end
223
+ codec.flush { |e| events << e }
224
+ expect(events.size).to eq 2
225
+
226
+ events.zip(samples.map{|(a, b)| b}).each do |tuple|
227
+ expect(tuple[1].encoding.name).to eq "UTF-8"
228
+ expect(tuple[0].get("message")).to eq tuple[1]
229
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
230
+ end
231
+ end
232
+ end
233
+
234
+ context "with invalid non UTF-8 source encoding" do
235
+
236
+ it "should encode to UTF-8" do
237
+ config.update("charset" => "ASCII-8BIT", "patterns" => "^\\s", "what" => "previous", "negate" => false)
238
+ samples = [
239
+ ["\xE0 Montr\xE9al", "� Montr�al"],
240
+ ["\xCE\xBA\xCF\x8C\xCF\x83\xCE\xBC\xCE\xB5", "����������"],
241
+ ]
242
+ events = []
243
+ samples.map{|(a, b)| a.force_encoding("ASCII-8BIT")}.each do |line|
244
+ expect(line.encoding.name).to eq "ASCII-8BIT"
245
+ expect(line.valid_encoding?).to eq true
246
+
247
+ codec.decode(line) { |event| events << event }
248
+ end
249
+ codec.flush { |e| events << e }
250
+ expect(events.size).to eq 2
251
+
252
+ events.zip(samples.map{|(a, b)| b}).each do |tuple|
253
+ expect(tuple[1].encoding.name).to eq "UTF-8"
254
+ expect(tuple[0].get("message")).to eq tuple[1]
255
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
256
+ end
257
+ end
258
+
259
+ end
260
+ end
261
+
262
+ context "with non closed multiline events" do
263
+ let(:random_number_of_events) { rand(300..1000) }
264
+ let(:sample_event) { "- Sample event" }
265
+ let(:events) { decode_events }
266
+ let(:unmerged_events_count) { events.collect { |event| event.get("message").split(LogStash::Codecs::Joinlines::NL).size }.inject(&:+) }
267
+
268
+ context "break on maximum_lines" do
269
+ let(:max_lines) { rand(10..100) }
270
+ let(:options) {
271
+ {
272
+ "patterns" => "^-",
273
+ "what" => "previous",
274
+ "negate" => false,
275
+ "max_lines" => max_lines,
276
+ "max_bytes" => "2 mb"
277
+ }
278
+ }
279
+
280
+ it "flushes on a maximum lines" do
281
+ expect(unmerged_events_count).to eq(random_number_of_events)
282
+ end
283
+
284
+ it "tags the event" do
285
+ expect(events.first.get("tags")).to include("joinlines_codec_max_lines_reached")
286
+ end
287
+ end
288
+
289
+ context "break on maximum bytes" do
290
+ let(:max_bytes) { rand(30..100) }
291
+ let(:options) {
292
+ {
293
+ "patterns" => "^-",
294
+ "what" => "previous",
295
+ "negate" => false,
296
+ "max_lines" => 20000,
297
+ "max_bytes" => max_bytes
298
+ }
299
+ }
300
+
301
+ it "flushes on a maximum bytes size" do
302
+ expect(unmerged_events_count).to eq(random_number_of_events)
303
+ end
304
+
305
+ it "tags the event" do
306
+ expect(events.first.get("tags")).to include("joinlines_codec_max_bytes_reached")
307
+ end
308
+ end
309
+ end
310
+
311
+ describe "auto flushing" do
312
+ let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
313
+ let(:events) { [] }
314
+ let(:lines) do
315
+ { "en.log" => ["hello world", " second line", " third line"],
316
+ "fr.log" => ["Salut le Monde", " deuxième ligne", " troisième ligne"],
317
+ "de.log" => ["Hallo Welt"] }
318
+ end
319
+ let(:listener_class) { Jlc::LineListener }
320
+ let(:auto_flush_interval) { 2 }
321
+
322
+ let(:line_producer) do
323
+ lambda do |path|
324
+ #create a listener that holds upstream state
325
+ listener = listener_class.new(events, codec, path)
326
+ lines[path].each do |data|
327
+ listener.accept(data)
328
+ end
329
+ end
330
+ end
331
+
332
+ let(:codec) do
333
+ Jlc::JoinlinesRspec.new(config).tap {|c| c.register}
334
+ end
335
+
336
+ before :each do
337
+ expect(LogStash::Codecs::Joinlines).to receive(:logger).and_return(Jlc::JoinlinesLogTracer.new).at_least(:once)
338
+ end
339
+
340
+ context "when auto_flush_interval is not set" do
341
+ it "does not build any events" do
342
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
343
+ line_producer.call("en.log")
344
+ sleep auto_flush_interval + 0.1
345
+ expect(events.size).to eq(0)
346
+ expect(codec.buffer_size).to eq(3)
347
+ end
348
+ end
349
+
350
+ context "when the auto_flush raises an exception" do
351
+ let(:errmsg) { "OMG, Daleks!" }
352
+ let(:listener_class) { Jlc::LineErrorListener }
353
+
354
+ it "does not build any events, logs an error and the buffer data remains" do
355
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
356
+ "auto_flush_interval" => auto_flush_interval)
357
+ line_producer.call("en.log")
358
+ sleep(auto_flush_interval + 0.2)
359
+ msg, args = codec.logger.trace_for(:error)
360
+ expect(msg).to eq("Joinlines: flush downstream error")
361
+ expect(args[:exception].message).to eq(errmsg)
362
+ expect(events.size).to eq(0)
363
+ expect(codec.buffer_size).to eq(3)
364
+ end
365
+ end
366
+
367
+ def assert_produced_events(key, sleeping)
368
+ line_producer.call(key)
369
+ sleep(sleeping)
370
+ yield
371
+ #expect(codec).to have_an_empty_buffer
372
+ end
373
+
374
+ context "mode: previous, when there are pauses between multiline file writes" do
375
+ it "auto-flushes events from the accumulated lines to the queue" do
376
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
377
+ "auto_flush_interval" => auto_flush_interval)
378
+
379
+ assert_produced_events("en.log", auto_flush_interval + 0.1) do
380
+ expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
381
+ end
382
+
383
+ line_producer.call("fr.log")
384
+ #next line(s) come before auto-flush i.e. assert its buffered
385
+ sleep(auto_flush_interval - 0.3)
386
+ expect(codec.buffer_size).to eq(3)
387
+ expect(events.size).to eq(1)
388
+
389
+ assert_produced_events("de.log", auto_flush_interval + 0.1) do
390
+ # now the events are generated
391
+ expect(events[1]).to match_path_and_line("fr.log", lines["fr.log"])
392
+ expect(events[2]).to match_path_and_line("de.log", lines["de.log"])
393
+ end
394
+ end
395
+ end
396
+
397
+ context "mode: next, when there are pauses between multiline file writes" do
398
+
399
+ let(:lines) do
400
+ { "en.log" => ["hello world++", "second line++", "third line"],
401
+ "fr.log" => ["Salut le Monde++", "deuxième ligne++", "troisième ligne"],
402
+ "de.log" => ["Hallo Welt"] }
403
+ end
404
+
405
+ it "auto-flushes events from the accumulated lines to the queue" do
406
+ config.update("patterns" => "\\+\\+$", "what" => "next", "negate" => false,
407
+ "auto_flush_interval" => auto_flush_interval)
408
+
409
+ assert_produced_events("en.log", auto_flush_interval + 0.1) do
410
+ # wait for auto_flush
411
+ expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
412
+ end
413
+
414
+ expect(codec).to have_an_empty_buffer
415
+
416
+ assert_produced_events("de.log", auto_flush_interval - 0.3) do
417
+ # this file is read before auto-flush, thus last event is not flushed yet
418
+ # This differs from logstash-codec-multiline because of not emitting
419
+ # last received event even if not matched
420
+ expect(events.size).to eq(1)
421
+ end
422
+
423
+ codec.flush { |event| events << event } # flushing here releases the event
424
+ expect(events.size).to eq(2)
425
+ expect(events[1]).to match_path_and_line(nil, lines["de.log"]) # but path is not set when emitted by flush
426
+ expect(codec).to have_an_empty_buffer
427
+
428
+ assert_produced_events("fr.log", auto_flush_interval + 0.1) do
429
+ # wait for auto_flush
430
+ expect(events[2]).to match_path_and_line("fr.log", lines["fr.log"])
431
+ end
432
+ end
433
+ end
434
+ end
435
+ end