logstash-codec-joinlines 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,301 @@
1
+ # encoding: utf-8
2
+ require "logstash/codecs/base"
3
+ require "logstash/util/charset"
4
+ require "logstash/timestamp"
5
+ require "logstash/codecs/auto_flush"
6
+
7
+ # The joinlines codec will join lines mathcing specified patterns.
8
+ # It is based on the multiline codec, but offers the opportunity to
9
+ # specify a list of patterns, whats and negates. The lists must be
10
+ # of equal length.
11
+ #
12
+ # IMPORTANT: If you are using a Logstash input plugin that supports multiple
13
+ # hosts, such as the <<plugins-inputs-beats>> input plugin, you should not use
14
+ # the joinlines codec to handle multiline events. Doing so may result in the
15
+ # mixing of streams and corrupted event data. In this situation, you need to
16
+ # handle multiline events before sending the event data to Logstash.
17
+ #
18
+ # Example usage
19
+ # [source,ruby]
20
+ # input {
21
+ # stdin {
22
+ # codec => joinlines {
23
+ # patterns => [ "^The following message", "^\s*at" ]
24
+ # what => [ "next", "previous" ]
25
+ # negate => [ false, false ]
26
+ # }
27
+ # }
28
+ # }
29
+ #
30
+ # The example above will join lines starting with "The following message"
31
+ # with the next line, and stack traces with the previous line.
32
+ #
33
+ module LogStash module Codecs class Joinlines < LogStash::Codecs::Base
34
+
35
+ # The codec name
36
+ config_name "joinlines"
37
+
38
+ # The patterns to recognize
39
+ config :patterns, :validate => :string, :list => true, :required => true
40
+
41
+ # The patterns to recognize
42
+ config :what, :validate => ["previous", "next"], :list => true, :required => true
43
+
44
+ # Negate match?
45
+ config :negate, :validate => :boolean, :list => true, :required => true
46
+
47
+ # Logstash ships by default with a bunch of patterns, so you don't
48
+ # necessarily need to define this yourself unless you are adding additional
49
+ # patterns.
50
+ #
51
+ # Pattern files are plain text with format:
52
+ # [source,ruby]
53
+ # NAME PATTERN
54
+ #
55
+ # For example:
56
+ # [source,ruby]
57
+ # NUMBER \d+
58
+ config :patterns_dir, :validate => :array, :default => []
59
+
60
+ # The character encoding used in this input. Examples include `UTF-8`
61
+ # and `cp1252`
62
+ #
63
+ # This setting is useful if your log files are in `Latin-1` (aka `cp1252`)
64
+ # or in another character set other than `UTF-8`.
65
+ #
66
+ # This only affects "plain" format logs since JSON is `UTF-8` already.
67
+ config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
68
+
69
+ # Tag multiline events with a given tag. This tag will only be added
70
+ # to events that actually have multiple lines in them.
71
+ config :multiline_tag, :validate => :string, :default => "joinlines"
72
+
73
+ # The accumulation of events can make logstash exit with an out of memory error
74
+ # if event boundaries are not correctly defined. This settings make sure to flush
75
+ # multiline events after reaching a number of lines, it is used in combination
76
+ # max_bytes.
77
+ config :max_lines, :validate => :number, :default => 500
78
+
79
+ # The accumulation of events can make logstash exit with an out of memory error
80
+ # if event boundaries are not correctly defined. This settings make sure to flush
81
+ # multiline events after reaching a number of bytes, it is used in combination
82
+ # max_lines.
83
+ config :max_bytes, :validate => :bytes, :default => "10 MiB"
84
+
85
+ # The accumulation of multiple lines will be converted to an event when either a
86
+ # matching new line is seen or there has been no new data appended for this many
87
+ # seconds. No default. If unset, no auto_flush. Units: seconds
88
+ config :auto_flush_interval, :validate => :number
89
+
90
+ public
91
+ def register
92
+ require "grok-pure" # rubygem 'jls-grok'
93
+ require 'logstash/patterns/core'
94
+
95
+ @matching = ""
96
+
97
+ # Detect if we are running from a jarfile, pick the right path.
98
+ patterns_path = []
99
+ patterns_path += [LogStash::Patterns::Core.path]
100
+
101
+ @patterns_dir = patterns_path.to_a + @patterns_dir
102
+ @groks = []
103
+ @handlers = []
104
+
105
+ @patterns.zip(@what).each do |pattern,what|
106
+ grok = Grok.new
107
+
108
+ @patterns_dir.each do |path|
109
+ if ::File.directory?(path)
110
+ path = ::File.join(path, "*")
111
+ end
112
+
113
+ Dir.glob(path).each do |file|
114
+ @logger.debug("Grok loading patterns from file", :path => file)
115
+ grok.add_patterns_from_file(file)
116
+ end
117
+ end
118
+
119
+ grok.compile(pattern)
120
+ handler = method("do_#{what}".to_sym)
121
+
122
+ @groks.push(grok)
123
+ @handlers.push(handler)
124
+ end
125
+
126
+ @logger.trace("Registered joinlines plugin", :type => @type, :config => @config)
127
+ reset_buffer
128
+
129
+ @converter = LogStash::Util::Charset.new(@charset)
130
+ @converter.logger = @logger
131
+
132
+ if @auto_flush_interval
133
+ # will start on first decode
134
+ @auto_flush_runner = AutoFlush.new(self, @auto_flush_interval)
135
+ end
136
+ end # def register
137
+
138
+ def use_mapper_auto_flush
139
+ return unless auto_flush_active?
140
+ @auto_flush_runner = AutoFlushUnset.new(nil, nil)
141
+ @auto_flush_interval = @auto_flush_interval.to_f
142
+ end
143
+
144
+ def accept(listener)
145
+ # memoize references to listener that holds upstream state
146
+ @previous_listener = @last_seen_listener || listener
147
+ @last_seen_listener = listener
148
+
149
+ internal_decode(listener.data) do |event,what|
150
+ what_based_listener(what).process_event(event)
151
+ end
152
+ end
153
+
154
+ def zip_config
155
+ @patterns.zip(@what, @negate, @groks, @handlers)
156
+ end
157
+
158
+ #private
159
+ def internal_decode(text, &block)
160
+ do_flush = false
161
+ text = @converter.convert(text)
162
+ text.split("\n").each do |line|
163
+ matched = false
164
+ zip_config.each do |pattern,what,negate,grok,handler|
165
+ match = grok.match(line)
166
+ @logger.debug("Joinlines", :pattern => pattern, :text => line,
167
+ :match => (match != false), :negate => negate)
168
+
169
+ # Add negate option
170
+ match = (match and !negate) || (!match and negate)
171
+
172
+ if match
173
+ do_flush = (what == "next" and @matching != "next")
174
+ matched = true
175
+ @matching = what
176
+ break
177
+ end
178
+ end
179
+
180
+ if !matched
181
+ do_flush = (@matching != "next")
182
+ @matching = ""
183
+ end
184
+
185
+ if do_flush
186
+ flush do |event|
187
+ yield(event,@matching)
188
+ end
189
+ do_flush = false
190
+ end
191
+
192
+ auto_flush_runner.start
193
+ buffer(line)
194
+ end
195
+ end
196
+
197
+ public
198
+ def decode(text, &block)
199
+ internal_decode(text) do |event,what|
200
+ yield(event)
201
+ end
202
+ end # def decode
203
+
204
+ def buffer(text)
205
+ @buffer_bytes += text.bytesize
206
+ @buffer.push(text)
207
+ end
208
+
209
+ def flush(&block)
210
+ if block_given? && @buffer.any?
211
+ no_error = true
212
+ events = merge_events
213
+ begin
214
+ yield events
215
+ rescue ::Exception => e
216
+ # need to rescue everything
217
+ # likliest cause: backpressure or timeout by exception
218
+ # can't really do anything but leave the data in the buffer for next time if there is one
219
+ @logger.error("Joinlines: flush downstream error", :exception => e)
220
+ no_error = false
221
+ end
222
+ reset_buffer if no_error
223
+ end
224
+ end
225
+
226
+ def auto_flush(listener = @last_seen_listener)
227
+ return if listener.nil?
228
+
229
+ flush do |event|
230
+ listener.process_event(event)
231
+ end
232
+ end
233
+
234
+ def merge_events
235
+ event = LogStash::Event.new(LogStash::Event::TIMESTAMP => @time, "message" => @buffer.join(NL))
236
+ event.tag @multiline_tag if !@multiline_tag.empty? && @buffer.size > 1
237
+ event.tag "joinlines_codec_max_bytes_reached" if over_maximum_bytes?
238
+ event.tag "joinlines_codec_max_lines_reached" if over_maximum_lines?
239
+ event
240
+ end
241
+
242
+ def reset_buffer
243
+ @buffer = []
244
+ @buffer_bytes = 0
245
+ end
246
+
247
+ def doing_previous?(what)
248
+ what != "next"
249
+ end
250
+
251
+ def what_based_listener(what)
252
+ doing_previous?(what) ? @previous_listener : @last_seen_listener
253
+ end
254
+
255
+ def do_next(text, matched, &block)
256
+ buffer(text)
257
+ auto_flush_runner.start
258
+ flush(&block) if !matched || buffer_over_limits?
259
+ end
260
+
261
+ def do_previous(text, matched, &block)
262
+ flush(&block) if !matched || buffer_over_limits?
263
+ auto_flush_runner.start
264
+ buffer(text)
265
+ end
266
+
267
+ def over_maximum_lines?
268
+ @buffer.size > @max_lines
269
+ end
270
+
271
+ def over_maximum_bytes?
272
+ @buffer_bytes >= @max_bytes
273
+ end
274
+
275
+ def buffer_over_limits?
276
+ over_maximum_lines? || over_maximum_bytes?
277
+ end
278
+
279
+ def encode(event)
280
+ # Nothing to do.
281
+ @on_event.call(event, event)
282
+ end # def encode
283
+
284
+ def close
285
+ auto_flush_runner.stop
286
+ end
287
+
288
+ def auto_flush_active?
289
+ !@auto_flush_interval.nil?
290
+ end
291
+
292
+ def auto_flush_runner
293
+ @auto_flush_runner || AutoFlushUnset.new(nil, nil)
294
+ end
295
+
296
+ def initialize_copy(source)
297
+ super
298
+ register
299
+ end
300
+
301
+ end end end # class LogStash::Codecs::Joinlines
@@ -0,0 +1,81 @@
1
+ require "concurrent"
2
+
3
+ module LogStash module Codecs class RetriggerableTask
4
+ SLEEP_FOR = 0.25.freeze
5
+
6
+ attr_reader :thread
7
+
8
+ def initialize(delay, listener)
9
+ @count = calculate_count(delay)
10
+ @listener = listener
11
+ @counter = Concurrent::AtomicFixnum.new(0 + @count)
12
+ @stopped = Concurrent::AtomicBoolean.new(false)
13
+ @semaphore = Concurrent::Semaphore.new(1)
14
+ end
15
+
16
+ def retrigger
17
+ return if stopped?
18
+ if executing?
19
+ @semaphore.acquire
20
+ end
21
+
22
+ if pending?
23
+ reset_counter
24
+ else
25
+ start
26
+ end
27
+ end
28
+
29
+ def close
30
+ @stopped.make_true
31
+ end
32
+
33
+ def counter
34
+ @counter.value
35
+ end
36
+
37
+ def executing?
38
+ running? && counter < 1
39
+ end
40
+
41
+ def pending?
42
+ running? && counter > 0
43
+ end
44
+
45
+ private
46
+
47
+ def calculate_count(value)
48
+ # in multiples of SLEEP_FOR (0.25) seconds
49
+ # if delay is 10 seconds then count is 40
50
+ # this only works when SLEEP_FOR is less than 1
51
+ return 1 if value < SLEEP_FOR
52
+ (value / SLEEP_FOR).floor
53
+ end
54
+
55
+ def reset_counter
56
+ @counter.value = 0 + @count
57
+ end
58
+
59
+ def running?
60
+ @thread && @thread.alive?
61
+ end
62
+
63
+ def start()
64
+ reset_counter
65
+ @thread = Thread.new do
66
+ while counter > 0
67
+ break if stopped?
68
+ sleep SLEEP_FOR
69
+ @counter.decrement
70
+ end
71
+
72
+ @semaphore.drain_permits
73
+ @listener.timeout if !stopped?
74
+ @semaphore.release
75
+ end
76
+ end
77
+
78
+ def stopped?
79
+ @stopped.value
80
+ end
81
+ end end end
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'logstash-codec-joinlines'
3
+ s.version = '0.1.0'
4
+ s.licenses = ['Apache-2.0']
5
+ s.summary = 'Merges multiline messages into a single event, allowing for multiple patterns.'
6
+ s.description = 'This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program'
7
+ s.homepage = 'https://github.com/lovmoen/logstash-codec-joinlines'
8
+ s.authors = ['Svein L. Ellingsen (lovmoen)']
9
+ s.email = 'lovmoen@gmail.com'
10
+ s.require_paths = ['lib']
11
+
12
+ # Files
13
+ s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
14
+ # Tests
15
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
16
+
17
+ # Special flag to let us know this is actually a logstash plugin
18
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "codec" }
19
+
20
+ # Gem dependencies
21
+ s.add_runtime_dependency 'logstash-core-plugin-api', "~> 2.0"
22
+ s.add_runtime_dependency 'logstash-codec-line'
23
+
24
+ s.add_runtime_dependency 'logstash-patterns-core'
25
+ s.add_runtime_dependency 'jls-grok', '~> 0.11.1'
26
+
27
+ s.add_development_dependency 'logstash-devutils'
28
+ end
@@ -0,0 +1,435 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/codecs/joinlines"
4
+ require "logstash/event"
5
+ require "insist"
6
+ require_relative '../spec_helper'
7
+
8
+ # above helper also defines a subclass of Joinlines
9
+ # called JoinlinesRspec that exposes the internal buffer
10
+ # and a Logger Mock
11
+
12
+ describe LogStash::Codecs::Joinlines do
13
+ context "#multipatterns" do
14
+ let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
15
+ let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
16
+ let(:events) { [] }
17
+ let(:line_producer) do
18
+ lambda do |lines|
19
+ lines.each do |line|
20
+ codec.decode(line) do |event|
21
+ events << event
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ it "should internally decode lines to (event, what) pairs" do
28
+ config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
29
+ text = "hello world\n second line\nanother first line\nnext\nowns previous\n"
30
+
31
+ events = []
32
+ whats = []
33
+ codec.internal_decode(text) do |event,what|
34
+ events.push(event)
35
+ whats.push(what)
36
+ end
37
+
38
+ # Must flush to get last event
39
+ codec.flush do |event|
40
+ events.push(event)
41
+ whats.push("final") # dummy
42
+ end
43
+
44
+ expect(events.size).to eq(3)
45
+ expect(whats.size).to eq(3)
46
+ expect(events[0].get("message")).to eq("hello world\n second line")
47
+ expect(events[1].get("message")).to eq("another first line")
48
+ expect(events[2].get("message")).to eq("next\nowns previous")
49
+ end
50
+
51
+ it "should break between consecutive previous and next" do
52
+ config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
53
+ lines = [ "hello world", " second line", "next", "owns previous" ]
54
+ line_producer.call(lines)
55
+ codec.flush { |e| events << e }
56
+
57
+ expect(events.size).to eq(2)
58
+ expect(events[0].get("message")).to eq "hello world\n second line"
59
+ expect(events[0].get("tags")).to include("joinlines")
60
+ expect(events[1].get("message")).to eq "next\nowns previous"
61
+ expect(events[1].get("tags")).to include("joinlines")
62
+ end
63
+
64
+ it "should stitch together consecutive next and previous" do
65
+ config.update("patterns" => ["^\\s", "next"], "what" => ["previous", "next"], "negate" => [false, false])
66
+ lines = [ "next", "owns previous and next", " second line", "another first" ]
67
+ line_producer.call(lines)
68
+ codec.flush { |e| events << e }
69
+
70
+ expect(events.size).to eq(2)
71
+ expect(events[0].get("message")).to eq "next\nowns previous and next\n second line"
72
+ expect(events[0].get("tags")).to include("joinlines")
73
+ expect(events[1].get("message")).to eq "another first"
74
+ expect(events[1].get("tags")).to be_nil
75
+ end
76
+ end
77
+
78
+ context "#decode" do
79
+ let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
80
+ let(:codec) { LogStash::Codecs::Joinlines.new(config).tap {|c| c.register } }
81
+ let(:events) { [] }
82
+ let(:line_producer) do
83
+ lambda do |lines|
84
+ lines.each do |line|
85
+ codec.decode(line) do |event|
86
+ events << event
87
+ end
88
+ end
89
+ end
90
+ end
91
+
92
+ it "should be able to handle multiline events with additional lines space-indented" do
93
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
94
+ lines = [ "hello world", " second line", "another first line" ]
95
+ line_producer.call(lines)
96
+ codec.flush { |e| events << e }
97
+
98
+ expect(events.size).to eq(2)
99
+ expect(events[0].get("message")).to eq "hello world\n second line"
100
+ expect(events[0].get("tags")).to include("joinlines")
101
+ expect(events[1].get("message")).to eq "another first line"
102
+ expect(events[1].get("tags")).to be_nil
103
+ end
104
+
105
+ it "should allow custom tag added to multiline events" do
106
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false, "multiline_tag" => "hurray")
107
+ lines = [ "hello world", " second line", "another first line" ]
108
+ line_producer.call(lines)
109
+ codec.flush { |e| events << e }
110
+
111
+ expect(events.size).to eq 2
112
+ expect(events[0].get("tags")).to include("hurray")
113
+ expect(events[1].get("tags")).to be_nil
114
+ end
115
+
116
+ it "should handle new lines in messages" do
117
+ config.update("patterns" => '\D', "what" => "previous", "negate" => false)
118
+ lineio = StringIO.new("1234567890\nA234567890\nB234567890\n0987654321\n")
119
+ until lineio.eof
120
+ line = lineio.read(256) #when this is set to 36 the tests fail
121
+ codec.decode(line) {|evt| events.push(evt)}
122
+ end
123
+ codec.flush { |e| events << e }
124
+ expect(events[0].get("message")).to eq "1234567890\nA234567890\nB234567890"
125
+ expect(events[1].get("message")).to eq "0987654321"
126
+ end
127
+
128
+ it "should allow grok patterns to be used" do
129
+ config.update(
130
+ "patterns" => "^%{NUMBER} %{TIME}",
131
+ "negate" => true,
132
+ "what" => "previous"
133
+ )
134
+
135
+ lines = [ "120913 12:04:33 first line", "second line", "third line" ]
136
+
137
+ line_producer.call(lines)
138
+ codec.flush { |e| events << e }
139
+
140
+ insist { events.size } == 1
141
+ insist { events.first.get("message") } == lines.join("\n")
142
+ end
143
+
144
+ context "using default UTF-8 charset" do
145
+
146
+ it "should decode valid UTF-8 input" do
147
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
148
+ lines = [ "foobar", "κόσμε" ]
149
+ lines.each do |line|
150
+ expect(line.encoding.name).to eq "UTF-8"
151
+ expect(line.valid_encoding?).to be_truthy
152
+ codec.decode(line) { |event| events << event }
153
+ end
154
+
155
+ codec.flush { |e| events << e }
156
+ expect(events.size).to eq 2
157
+
158
+ events.zip(lines).each do |tuple|
159
+ expect(tuple[0].get("message")).to eq tuple[1]
160
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
161
+ end
162
+ end
163
+
164
+ it "should escape invalid sequences" do
165
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
166
+ lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
167
+ lines.each do |line|
168
+ expect(line.encoding.name).to eq "UTF-8"
169
+ expect(line.valid_encoding?).to eq false
170
+
171
+ codec.decode(line) { |event| events << event }
172
+ end
173
+ codec.flush { |e| events << e }
174
+ expect(events.size).to eq 2
175
+
176
+ events.zip(lines).each do |tuple|
177
+ expect(tuple[0].get("message")).to eq tuple[1].inspect[1..-2]
178
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
179
+ end
180
+ end
181
+
182
+ it "decodes and joins multiple patterns" do
183
+ config.update("patterns" => [ "^\\s", "^the following" ], "what" => [ "previous", "next" ], "negate" => [ false, false] )
184
+ lines = [ "hello world", " second line", "another first line", "the following message belongs to next", "I own the previous", "Another first" ]
185
+
186
+ lines.each do |line|
187
+ codec.decode(line) do |event|
188
+ events << event
189
+ end
190
+ end
191
+
192
+ codec.flush { |e| events << e }
193
+
194
+ #expect(events.size).to eq(4)
195
+ expect(events[0].get("message")).to eq "hello world\n second line"
196
+ expect(events[0].get("tags")).to include("joinlines")
197
+ expect(events[1].get("message")).to eq "another first line"
198
+ expect(events[1].get("tags")).to be_nil
199
+ expect(events[2].get("message")).to eq "the following message belongs to next\nI own the previous"
200
+ expect(events[2].get("tags")).to include("joinlines")
201
+ expect(events[3].get("message")).to eq "Another first"
202
+ expect(events[3].get("tags")).to be_nil
203
+ end
204
+ end
205
+
206
+
207
+ context "with valid non UTF-8 source encoding" do
208
+
209
+ it "should encode to UTF-8" do
210
+ config.update("charset" => "ISO-8859-1", "patterns" => "^\\s", "what" => "previous", "negate" => false)
211
+ samples = [
212
+ ["foobar", "foobar"],
213
+ ["\xE0 Montr\xE9al", "à Montréal"],
214
+ ]
215
+
216
+ # lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
217
+ samples.map{|(a, b)| a.force_encoding("ISO-8859-1")}.each do |line|
218
+ expect(line.encoding.name).to eq "ISO-8859-1"
219
+ expect(line.valid_encoding?).to eq true
220
+
221
+ codec.decode(line) { |event| events << event }
222
+ end
223
+ codec.flush { |e| events << e }
224
+ expect(events.size).to eq 2
225
+
226
+ events.zip(samples.map{|(a, b)| b}).each do |tuple|
227
+ expect(tuple[1].encoding.name).to eq "UTF-8"
228
+ expect(tuple[0].get("message")).to eq tuple[1]
229
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
230
+ end
231
+ end
232
+ end
233
+
234
+ context "with invalid non UTF-8 source encoding" do
235
+
236
+ it "should encode to UTF-8" do
237
+ config.update("charset" => "ASCII-8BIT", "patterns" => "^\\s", "what" => "previous", "negate" => false)
238
+ samples = [
239
+ ["\xE0 Montr\xE9al", "� Montr�al"],
240
+ ["\xCE\xBA\xCF\x8C\xCF\x83\xCE\xBC\xCE\xB5", "����������"],
241
+ ]
242
+ events = []
243
+ samples.map{|(a, b)| a.force_encoding("ASCII-8BIT")}.each do |line|
244
+ expect(line.encoding.name).to eq "ASCII-8BIT"
245
+ expect(line.valid_encoding?).to eq true
246
+
247
+ codec.decode(line) { |event| events << event }
248
+ end
249
+ codec.flush { |e| events << e }
250
+ expect(events.size).to eq 2
251
+
252
+ events.zip(samples.map{|(a, b)| b}).each do |tuple|
253
+ expect(tuple[1].encoding.name).to eq "UTF-8"
254
+ expect(tuple[0].get("message")).to eq tuple[1]
255
+ expect(tuple[0].get("message").encoding.name).to eq "UTF-8"
256
+ end
257
+ end
258
+
259
+ end
260
+ end
261
+
262
+ context "with non closed multiline events" do
263
+ let(:random_number_of_events) { rand(300..1000) }
264
+ let(:sample_event) { "- Sample event" }
265
+ let(:events) { decode_events }
266
+ let(:unmerged_events_count) { events.collect { |event| event.get("message").split(LogStash::Codecs::Joinlines::NL).size }.inject(&:+) }
267
+
268
+ context "break on maximum_lines" do
269
+ let(:max_lines) { rand(10..100) }
270
+ let(:options) {
271
+ {
272
+ "patterns" => "^-",
273
+ "what" => "previous",
274
+ "negate" => false,
275
+ "max_lines" => max_lines,
276
+ "max_bytes" => "2 mb"
277
+ }
278
+ }
279
+
280
+ it "flushes on a maximum lines" do
281
+ expect(unmerged_events_count).to eq(random_number_of_events)
282
+ end
283
+
284
+ it "tags the event" do
285
+ expect(events.first.get("tags")).to include("joinlines_codec_max_lines_reached")
286
+ end
287
+ end
288
+
289
+ context "break on maximum bytes" do
290
+ let(:max_bytes) { rand(30..100) }
291
+ let(:options) {
292
+ {
293
+ "patterns" => "^-",
294
+ "what" => "previous",
295
+ "negate" => false,
296
+ "max_lines" => 20000,
297
+ "max_bytes" => max_bytes
298
+ }
299
+ }
300
+
301
+ it "flushes on a maximum bytes size" do
302
+ expect(unmerged_events_count).to eq(random_number_of_events)
303
+ end
304
+
305
+ it "tags the event" do
306
+ expect(events.first.get("tags")).to include("joinlines_codec_max_bytes_reached")
307
+ end
308
+ end
309
+ end
310
+
311
+ describe "auto flushing" do
312
+ let(:config) { {"patterns" => "", "what" => "next", "negate" => false} }
313
+ let(:events) { [] }
314
+ let(:lines) do
315
+ { "en.log" => ["hello world", " second line", " third line"],
316
+ "fr.log" => ["Salut le Monde", " deuxième ligne", " troisième ligne"],
317
+ "de.log" => ["Hallo Welt"] }
318
+ end
319
+ let(:listener_class) { Jlc::LineListener }
320
+ let(:auto_flush_interval) { 2 }
321
+
322
+ let(:line_producer) do
323
+ lambda do |path|
324
+ #create a listener that holds upstream state
325
+ listener = listener_class.new(events, codec, path)
326
+ lines[path].each do |data|
327
+ listener.accept(data)
328
+ end
329
+ end
330
+ end
331
+
332
+ let(:codec) do
333
+ Jlc::JoinlinesRspec.new(config).tap {|c| c.register}
334
+ end
335
+
336
+ before :each do
337
+ expect(LogStash::Codecs::Joinlines).to receive(:logger).and_return(Jlc::JoinlinesLogTracer.new).at_least(:once)
338
+ end
339
+
340
+ context "when auto_flush_interval is not set" do
341
+ it "does not build any events" do
342
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false)
343
+ line_producer.call("en.log")
344
+ sleep auto_flush_interval + 0.1
345
+ expect(events.size).to eq(0)
346
+ expect(codec.buffer_size).to eq(3)
347
+ end
348
+ end
349
+
350
+ context "when the auto_flush raises an exception" do
351
+ let(:errmsg) { "OMG, Daleks!" }
352
+ let(:listener_class) { Jlc::LineErrorListener }
353
+
354
+ it "does not build any events, logs an error and the buffer data remains" do
355
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
356
+ "auto_flush_interval" => auto_flush_interval)
357
+ line_producer.call("en.log")
358
+ sleep(auto_flush_interval + 0.2)
359
+ msg, args = codec.logger.trace_for(:error)
360
+ expect(msg).to eq("Joinlines: flush downstream error")
361
+ expect(args[:exception].message).to eq(errmsg)
362
+ expect(events.size).to eq(0)
363
+ expect(codec.buffer_size).to eq(3)
364
+ end
365
+ end
366
+
367
+ def assert_produced_events(key, sleeping)
368
+ line_producer.call(key)
369
+ sleep(sleeping)
370
+ yield
371
+ #expect(codec).to have_an_empty_buffer
372
+ end
373
+
374
+ context "mode: previous, when there are pauses between multiline file writes" do
375
+ it "auto-flushes events from the accumulated lines to the queue" do
376
+ config.update("patterns" => "^\\s", "what" => "previous", "negate" => false,
377
+ "auto_flush_interval" => auto_flush_interval)
378
+
379
+ assert_produced_events("en.log", auto_flush_interval + 0.1) do
380
+ expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
381
+ end
382
+
383
+ line_producer.call("fr.log")
384
+ #next line(s) come before auto-flush i.e. assert its buffered
385
+ sleep(auto_flush_interval - 0.3)
386
+ expect(codec.buffer_size).to eq(3)
387
+ expect(events.size).to eq(1)
388
+
389
+ assert_produced_events("de.log", auto_flush_interval + 0.1) do
390
+ # now the events are generated
391
+ expect(events[1]).to match_path_and_line("fr.log", lines["fr.log"])
392
+ expect(events[2]).to match_path_and_line("de.log", lines["de.log"])
393
+ end
394
+ end
395
+ end
396
+
397
+ context "mode: next, when there are pauses between multiline file writes" do
398
+
399
+ let(:lines) do
400
+ { "en.log" => ["hello world++", "second line++", "third line"],
401
+ "fr.log" => ["Salut le Monde++", "deuxième ligne++", "troisième ligne"],
402
+ "de.log" => ["Hallo Welt"] }
403
+ end
404
+
405
+ it "auto-flushes events from the accumulated lines to the queue" do
406
+ config.update("patterns" => "\\+\\+$", "what" => "next", "negate" => false,
407
+ "auto_flush_interval" => auto_flush_interval)
408
+
409
+ assert_produced_events("en.log", auto_flush_interval + 0.1) do
410
+ # wait for auto_flush
411
+ expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
412
+ end
413
+
414
+ expect(codec).to have_an_empty_buffer
415
+
416
+ assert_produced_events("de.log", auto_flush_interval - 0.3) do
417
+ # this file is read before auto-flush, thus last event is not flushed yet
418
+ # This differs from logstash-codec-multiline because of not emitting
419
+ # last received event even if not matched
420
+ expect(events.size).to eq(1)
421
+ end
422
+
423
+ codec.flush { |event| events << event } # flushing here releases the event
424
+ expect(events.size).to eq(2)
425
+ expect(events[1]).to match_path_and_line(nil, lines["de.log"]) # but path is not set when emitted by flush
426
+ expect(codec).to have_an_empty_buffer
427
+
428
+ assert_produced_events("fr.log", auto_flush_interval + 0.1) do
429
+ # wait for auto_flush
430
+ expect(events[2]).to match_path_and_line("fr.log", lines["fr.log"])
431
+ end
432
+ end
433
+ end
434
+ end
435
+ end