logstash-codec-multiline 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd43c9171bdc01db5f0d381b4c95994e7b6073d4
4
- data.tar.gz: 07230a73ec9e7cb9ccddb94921d540323e4d0e86
3
+ metadata.gz: ad1702d90e9bf8fb69ffd334989def88c34b8afa
4
+ data.tar.gz: dc3b95f3bac523a85107eeb5c0142f6687a09b25
5
5
  SHA512:
6
- metadata.gz: 2aec937d4fdf0bbde23c38dee5544198ee8bb5c89efe146bf96b24d4e77bf0f004ba197508168e37f700bd8ebcd913a4719a766b7ce7c7c50ce526b9f9d83623
7
- data.tar.gz: 3174e737bc7922cbafec8cda5f1ee85ec9c1691e6918e5e61ab20da35167b6c9969147a4ff869e52057a8385190edd2058699fbc636bf5e69f3b7ccba8a60983
6
+ metadata.gz: f29daf9944841236fd22a7c4dfe80a3fa7f4ba3efd3e06809cc35d8b8345b7e691aec1695413bcbc1a4f288feceb923ed4798ff1eec8f5ea710b29fbd97915f5
7
+ data.tar.gz: 3ba39d672418818e50deafc4f24c9d6ca9196eada5b88ca884c406eafcfdb0e4d6c9650418071623d2a229e675346ac8d0f8205f902a6b298c3f8e70e727ab2c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 2.0.5
2
+ - Add auto_flush config option, with no default. If not set, no auto_flush is done.
3
+ - Add evict method to identity_map_codec that allows for an input, when done with an identity, to auto_flush and remove the identity from the map.
4
+
1
5
  ## 2.0.4
2
6
  - Add constructional method to allow an eviction specific block to be set.
3
7
 
@@ -0,0 +1,66 @@
1
+ # encoding: utf-8
2
+ require "concurrent"
3
+
4
+ module LogStash module Codecs class AutoFlush
5
+ def initialize(mc, interval)
6
+ @mc, @interval = mc, interval
7
+ @stopped = Concurrent::AtomicBoolean.new # false by default
8
+ end
9
+
10
+ def start
11
+ # can't start if pipeline is stopping
12
+ return self if stopped?
13
+ if pending?
14
+ @task.reset
15
+ elsif finished?
16
+ @task = Concurrent::ScheduledTask.execute(@interval) do
17
+ @mc.auto_flush()
18
+ end
19
+ # else the task is executing
20
+ end
21
+ self
22
+ end
23
+
24
+ def finished?
25
+ return true if @task.nil?
26
+ @task.fulfilled?
27
+ end
28
+
29
+ def pending?
30
+ @task && @task.pending?
31
+ end
32
+
33
+ def stopped?
34
+ @stopped.value
35
+ end
36
+
37
+ def stop
38
+ @stopped.make_true
39
+ @task.cancel if pending?
40
+ end
41
+ end
42
+
43
+ class AutoFlushUnset
44
+ def initialize(mc, interval)
45
+ end
46
+
47
+ def pending?
48
+ false
49
+ end
50
+
51
+ def stopped?
52
+ true
53
+ end
54
+
55
+ def start
56
+ self
57
+ end
58
+
59
+ def finished?
60
+ true
61
+ end
62
+
63
+ def stop
64
+ self
65
+ end
66
+ end end end
@@ -63,7 +63,7 @@ module LogStash module Codecs class IdentityMapCodec
63
63
  def stop
64
64
  return if !running?
65
65
  @running = false
66
- @thread.wakeup
66
+ @thread.wakeup if @thread.alive?
67
67
  end
68
68
  end
69
69
 
@@ -136,6 +136,17 @@ module LogStash module Codecs class IdentityMapCodec
136
136
  # end Constructional/builder methods
137
137
  # ==============================================
138
138
 
139
+ # ==============================================
140
+ # IdentityMapCodec API
141
+ def evict(identity)
142
+ # maybe called more than once
143
+ if (compo = identity_map.delete(identity))
144
+ compo.codec.auto_flush if compo.codec.respond_to?(:auto_flush)
145
+ end
146
+ end
147
+ # end IdentityMapCodec API
148
+ # ==============================================
149
+
139
150
  # ==============================================
140
151
  # Codec API
141
152
  def decode(data, identity = nil, &block)
@@ -143,22 +154,29 @@ module LogStash module Codecs class IdentityMapCodec
143
154
  stream_codec(identity).decode(data, &block)
144
155
  end
145
156
 
157
+ def accept(listener)
158
+ stream_codec(listener.path).accept(listener)
159
+ end
160
+
146
161
  alias_method :<<, :decode
147
162
 
148
163
  def encode(event, identity = nil)
149
164
  stream_codec(identity).encode(event)
150
165
  end
151
166
 
152
- # this method will not be called from
153
- # the input or the pipeline unless
154
- # we implement codec flush on shutdown
155
- # problematic, because we may not have
156
- # received all the multiline parts yet.
157
- # but if we don't flush we will lose data
158
167
  def flush(&block)
159
168
  all_codecs.each do |codec|
160
169
  #let ruby do its default args thing
161
- block.nil? ? codec.flush : codec.flush(&block)
170
+ if block_given?
171
+ codec.flush(&block)
172
+ else
173
+ if codec.respond_to?(:auto_flush)
174
+ codec.auto_flush
175
+ else
176
+ #try this, no guarantees
177
+ codec.flush
178
+ end
179
+ end
162
180
  end
163
181
  end
164
182
 
@@ -191,13 +209,24 @@ module LogStash module Codecs class IdentityMapCodec
191
209
  # contents should not mutate during this call
192
210
  identity_map.delete_if do |identity, compo|
193
211
  if (flag = compo.timeout <= cut_off)
194
- compo.codec.flush(&(@eviction_block || @decode_block))
212
+ evict_flush(compo.codec)
195
213
  end
196
214
  flag
197
215
  end
198
216
  current_size_and_limit
199
217
  end
200
218
 
219
+ def evict_flush(codec)
220
+ if codec.respond_to?(:auto_flush)
221
+ codec.auto_flush
222
+ else
223
+ if (block = @eviction_block || @decode_block)
224
+ codec.flush(&block)
225
+ end
226
+ # all else - can't do anything
227
+ end
228
+ end
229
+
201
230
  def current_size_and_limit
202
231
  [identity_count, max_limit]
203
232
  end
@@ -2,6 +2,7 @@
2
2
  require "logstash/codecs/base"
3
3
  require "logstash/util/charset"
4
4
  require "logstash/timestamp"
5
+ require "logstash/codecs/auto_flush"
5
6
 
6
7
  # The multiline codec will collapse multiline messages and merge them into a
7
8
  # single event.
@@ -76,7 +77,7 @@ require "logstash/timestamp"
76
77
  # This says that any line ending with a backslash should be combined with the
77
78
  # following line.
78
79
  #
79
- class LogStash::Codecs::Multiline < LogStash::Codecs::Base
80
+ module LogStash module Codecs class Multiline < LogStash::Codecs::Base
80
81
  config_name "multiline"
81
82
 
82
83
  # The regular expression to match.
@@ -126,7 +127,13 @@ class LogStash::Codecs::Multiline < LogStash::Codecs::Base
126
127
  # max_lines.
127
128
  config :max_bytes, :validate => :bytes, :default => "10 MiB"
128
129
 
130
+ # The accumulation of multiple lines will be converted to an event when either a
131
+ # matching new line is seen or there has been no new data appended for this time
132
+ # auto_flush_interval. No default. If unset, no auto_flush
133
+ config :auto_flush_interval, :validate => :number
134
+
129
135
  public
136
+
130
137
  def register
131
138
  require "grok-pure" # rubygem 'jls-grok'
132
139
  require 'logstash/patterns/core'
@@ -139,8 +146,8 @@ class LogStash::Codecs::Multiline < LogStash::Codecs::Base
139
146
 
140
147
  @patterns_dir = patterns_path.to_a + @patterns_dir
141
148
  @patterns_dir.each do |path|
142
- if File.directory?(path)
143
- path = File.join(path, "*")
149
+ if ::File.directory?(path)
150
+ path = ::File.join(path, "*")
144
151
  end
145
152
 
146
153
  Dir.glob(path).each do |file|
@@ -158,11 +165,23 @@ class LogStash::Codecs::Multiline < LogStash::Codecs::Base
158
165
 
159
166
  @converter = LogStash::Util::Charset.new(@charset)
160
167
  @converter.logger = @logger
168
+ if @auto_flush_interval
169
+ # will start on first decode
170
+ @auto_flush_runner = AutoFlush.new(self, @auto_flush_interval)
171
+ end
161
172
  end # def register
162
173
 
174
+ def accept(listener)
175
+ # memoize references to listener that holds upstream state
176
+ @previous_listener = @last_seen_listener || listener
177
+ @last_seen_listener = listener
178
+ decode(listener.data) do |event|
179
+ what_based_listener.process_event(event)
180
+ end
181
+ end
182
+
163
183
  def decode(text, &block)
164
184
  text = @converter.convert(text)
165
-
166
185
  text.split("\n").each do |line|
167
186
  match = @grok.match(line)
168
187
  @logger.debug("Multiline", :pattern => @pattern, :text => line,
@@ -175,23 +194,41 @@ class LogStash::Codecs::Multiline < LogStash::Codecs::Base
175
194
  end # def decode
176
195
 
177
196
  def buffer(text)
178
- @time = LogStash::Timestamp.now if @buffer.empty?
179
197
  @buffer_bytes += text.bytesize
180
- @buffer << text
198
+ @buffer.push(text).tap do |b|
199
+ # do start but preserve the return value
200
+ auto_flush_runner.start
201
+ end
181
202
  end
182
203
 
183
204
  def flush(&block)
184
- if @buffer.any?
185
- yield merge_events
186
- reset_buffer
205
+ if block_given? && @buffer.any?
206
+ no_error = true
207
+ events = merge_events
208
+ begin
209
+ yield events
210
+ rescue ::Exception => e
211
+ # need to rescue everything
212
+ # likliest cause: backpressure or timeout by exception
213
+ # can't really do anything but leave the data in the buffer for next time if there is one
214
+ @logger.error("Multiline: flush downstream error", :exception => e)
215
+ no_error = false
216
+ end
217
+ reset_buffer if no_error
218
+ end
219
+ end
220
+
221
+ def auto_flush
222
+ flush do |event|
223
+ @last_seen_listener.process_event(event)
187
224
  end
188
225
  end
189
226
 
190
227
  def merge_events
191
228
  event = LogStash::Event.new(LogStash::Event::TIMESTAMP => @time, "message" => @buffer.join(NL))
192
229
  event.tag @multiline_tag if @multiline_tag && @buffer.size > 1
193
- event.tag "multiline_codec_max_bytes_reached" if over_maximun_bytes?
194
- event.tag "multiline_codec_max_lines_reached" if over_maximun_lines?
230
+ event.tag "multiline_codec_max_bytes_reached" if over_maximum_bytes?
231
+ event.tag "multiline_codec_max_lines_reached" if over_maximum_lines?
195
232
  event
196
233
  end
197
234
 
@@ -200,6 +237,14 @@ class LogStash::Codecs::Multiline < LogStash::Codecs::Base
200
237
  @buffer_bytes = 0
201
238
  end
202
239
 
240
+ def doing_previous?
241
+ @what == "previous"
242
+ end
243
+
244
+ def what_based_listener
245
+ doing_previous? ? @previous_listener : @last_seen_listener
246
+ end
247
+
203
248
  def do_next(text, matched, &block)
204
249
  buffer(text)
205
250
  flush(&block) if !matched || buffer_over_limits?
@@ -210,16 +255,16 @@ class LogStash::Codecs::Multiline < LogStash::Codecs::Base
210
255
  buffer(text)
211
256
  end
212
257
 
213
- def over_maximun_lines?
258
+ def over_maximum_lines?
214
259
  @buffer.size > @max_lines
215
260
  end
216
261
 
217
- def over_maximun_bytes?
262
+ def over_maximum_bytes?
218
263
  @buffer_bytes >= @max_bytes
219
264
  end
220
265
 
221
266
  def buffer_over_limits?
222
- over_maximun_lines? || over_maximun_bytes?
267
+ over_maximum_lines? || over_maximum_bytes?
223
268
  end
224
269
 
225
270
  def encode(event)
@@ -227,4 +272,19 @@ class LogStash::Codecs::Multiline < LogStash::Codecs::Base
227
272
  @on_event.call(event, event)
228
273
  end # def encode
229
274
 
230
- end # class LogStash::Codecs::Multiline
275
+ def close
276
+ if auto_flush_runner.pending?
277
+ #will cancel task if necessary
278
+ auto_flush_runner.stop
279
+ end
280
+ auto_flush
281
+ end
282
+
283
+ def auto_flush_active?
284
+ !@auto_flush_interval.nil?
285
+ end
286
+
287
+ def auto_flush_runner
288
+ @auto_flush_runner || AutoFlushUnset.new(nil, nil)
289
+ end
290
+ end end end # class LogStash::Codecs::Multiline
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-codec-multiline'
4
- s.version = '2.0.4'
4
+ s.version = '2.0.5'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "The multiline codec will collapse multiline messages and merge them into a single event."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -0,0 +1,118 @@
1
+ # encoding: utf-8
2
+ require "logstash/codecs/auto_flush"
3
+ require "logstash/codecs/multiline"
4
+ require_relative "../supports/helpers.rb"
5
+
6
+ describe "AutoFlush and AutoFlushUnset" do
7
+ let(:flushable) { AutoFlushTracer.new }
8
+ let(:flush_wait) { 0.1 }
9
+
10
+ describe LogStash::Codecs::AutoFlush do
11
+ subject { described_class.new(flushable, flush_wait) }
12
+
13
+ context "when initialized" do
14
+ it "#pending? is false" do
15
+ expect(subject.pending?).to be_falsy
16
+ end
17
+
18
+ it "#stopped? is false" do
19
+ expect(subject.stopped?).to be_falsy
20
+ end
21
+
22
+ it "#finished? is true" do
23
+ expect(subject.finished?).to be_truthy
24
+ end
25
+ end
26
+
27
+ context "when started" do
28
+ let(:flush_wait) { 20 }
29
+
30
+ before { subject.start }
31
+ after { subject.stop }
32
+
33
+ it "#pending? is true" do
34
+ expect(subject.pending?).to be_truthy
35
+ end
36
+
37
+ it "#stopped? is false" do
38
+ expect(subject.stopped?).to be_falsy
39
+ end
40
+
41
+ it "#finished? is false" do
42
+ expect(subject.finished?).to be_falsy
43
+ end
44
+ end
45
+
46
+ context "when finished" do
47
+ before do
48
+ subject.start
49
+ sleep flush_wait + 0.1
50
+ end
51
+
52
+ after { subject.stop }
53
+
54
+ it "calls auto_flush on flushable" do
55
+ expect(flushable.trace_for(:auto_flush)).to be_truthy
56
+ end
57
+
58
+ it "#pending? is false" do
59
+ expect(subject.pending?).to be_falsy
60
+ end
61
+
62
+ it "#stopped? is false" do
63
+ expect(subject.stopped?).to be_falsy
64
+ end
65
+
66
+ it "#finished? is true" do
67
+ expect(subject.finished?).to be_truthy
68
+ end
69
+ end
70
+
71
+ context "when stopped" do
72
+ before do
73
+ subject.start
74
+ subject.stop
75
+ end
76
+
77
+ it "does not call auto_flush on flushable" do
78
+ expect(flushable.trace_for(:auto_flush)).to be_falsy
79
+ end
80
+
81
+ it "#pending? is false" do
82
+ expect(subject.pending?).to be_falsy
83
+ end
84
+
85
+ it "#stopped? is true" do
86
+ expect(subject.stopped?).to be_truthy
87
+ end
88
+
89
+ it "#finished? is false" do
90
+ expect(subject.finished?).to be_falsy
91
+ end
92
+ end
93
+ end
94
+
95
+ describe LogStash::Codecs::AutoFlushUnset do
96
+ subject { described_class.new(flushable, 2) }
97
+
98
+ it "#pending? is false" do
99
+ expect(subject.pending?).to be_falsy
100
+ end
101
+
102
+ it "#stopped? is true" do
103
+ expect(subject.stopped?).to be_truthy
104
+ end
105
+
106
+ it "#finished? is true" do
107
+ expect(subject.finished?).to be_truthy
108
+ end
109
+
110
+ it "#start returns self" do
111
+ expect(subject.start).to eq(subject)
112
+ end
113
+
114
+ it "#stop returns self" do
115
+ expect(subject.start).to eq(subject)
116
+ end
117
+ end
118
+ end
@@ -1,32 +1,8 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/devutils/rspec/spec_helper"
3
3
  require "logstash/codecs/identity_map_codec"
4
-
5
- class LogTracer
6
- def initialize() @tracer = []; end
7
- def warn(*args) @tracer.push [:warn, args]; end
8
- def error(*args) @tracer.push [:error, args]; end
9
-
10
- def trace_for(symbol)
11
- params = @tracer.assoc(symbol)
12
- params.nil? ? false : params.last
13
- end
14
- end
15
-
16
- class IdentityMapCodecTracer
17
- def initialize() @tracer = []; end
18
- def clone() self.class.new; end
19
- def decode(data) @tracer.push [:decode, data]; end
20
- def encode(event) @tracer.push [:encode, event]; end
21
- def flush(&block) @tracer.push [:flush, block.call]; end
22
- def close() @tracer.push [:close, true]; end
23
- def logger() @logger ||= LogTracer.new; end
24
-
25
- def trace_for(symbol)
26
- params = @tracer.assoc(symbol)
27
- params.nil? ? false : params.last
28
- end
29
- end
4
+ require "logstash/codecs/multiline"
5
+ require_relative "../supports/helpers.rb"
30
6
 
31
7
  describe LogStash::Codecs::IdentityMapCodec do
32
8
  let(:codec) { IdentityMapCodecTracer.new }
@@ -220,4 +196,49 @@ describe LogStash::Codecs::IdentityMapCodec do
220
196
  end
221
197
  end
222
198
  end
199
+
200
+ describe "observer/listener based processing" do
201
+ let(:listener) { LineListener }
202
+ let(:queue) { [] }
203
+ let(:identity) { "stream1" }
204
+ let(:config) { {"pattern" => "^\\s", "what" => "previous"} }
205
+ let(:mlc) { MultilineRspec.new(config).tap {|c| c.register } }
206
+ let(:imc) { described_class.new(mlc) }
207
+
208
+ before do
209
+ listener = LineListener.new(queue, imc, identity)
210
+ listener.accept("foo")
211
+ end
212
+
213
+ describe "normal processing" do
214
+ context "when wrapped codec has auto-flush deactivated" do
215
+ it "no events are generated (the line is buffered)" do
216
+ expect(imc.identity_count).to eq(1)
217
+ expect(queue.size).to eq(0)
218
+ expect(mlc.internal_buffer[0]).to eq("foo")
219
+ end
220
+ end
221
+
222
+ context "when wrapped codec has auto-flush activated" do
223
+ let(:config) { {"pattern" => "^\\s", "what" => "previous", "auto_flush_interval" => 0.2} }
224
+ it "one event is generated" do
225
+ sleep 0.4
226
+ expect(queue.size).to eq(1)
227
+ expect(queue[0]["message"]).to eq("foo")
228
+ expect(imc.identity_count).to eq(1)
229
+ end
230
+ end
231
+ end
232
+
233
+ describe "evict method" do
234
+ context "when evicting and wrapped codec implements auto-flush" do
235
+ it "flushes and removes the identity" do
236
+ expect(imc.identity_count).to eq(1)
237
+ imc.evict(identity)
238
+ expect(queue[0]["message"]).to eq("foo")
239
+ expect(imc.identity_count).to eq(0)
240
+ end
241
+ end
242
+ end
243
+ end
223
244
  end
@@ -3,57 +3,63 @@ require "logstash/codecs/multiline"
3
3
  require "logstash/event"
4
4
  require "insist"
5
5
  require_relative "../supports/helpers.rb"
6
+ # above helper also defines a subclass of Multiline
7
+ # called MultilineRspec that exposes the internal buffer
8
+ # and a Logger Mock
6
9
 
7
10
  describe LogStash::Codecs::Multiline do
8
11
  context "#decode" do
9
- it "should be able to handle multiline events with additional lines space-indented" do
10
- codec = LogStash::Codecs::Multiline.new("pattern" => "^\\s", "what" => "previous")
11
- lines = [ "hello world", " second line", "another first line" ]
12
- events = []
13
- lines.each do |line|
14
- codec.decode(line) do |event|
15
- events << event
12
+ let(:config) { {} }
13
+ let(:codec) { LogStash::Codecs::Multiline.new(config).tap {|c| c.register } }
14
+ let(:events) { [] }
15
+ let(:line_producer) do
16
+ lambda do |lines|
17
+ lines.each do |line|
18
+ codec.decode(line) do |event|
19
+ events << event
20
+ end
16
21
  end
17
22
  end
23
+ end
24
+
25
+ it "should be able to handle multiline events with additional lines space-indented" do
26
+ config.update("pattern" => "^\\s", "what" => "previous")
27
+ lines = [ "hello world", " second line", "another first line" ]
28
+ line_producer.call(lines)
18
29
  codec.flush { |e| events << e }
19
- insist { events.size } == 2
20
- insist { events[0]["message"] } == "hello world\n second line"
21
- insist { events[0]["tags"] }.include?("multiline")
22
- insist { events[1]["message"] } == "another first line"
23
- insist { events[1]["tags"] }.nil?
30
+
31
+ expect(events.size).to eq(2)
32
+ expect(events[0]["message"]).to eq "hello world\n second line"
33
+ expect(events[0]["tags"]).to include("multiline")
34
+ expect(events[1]["message"]).to eq "another first line"
35
+ expect(events[1]["tags"]).to be_nil
24
36
  end
25
37
 
26
38
  it "should allow custom tag added to multiline events" do
27
- codec = LogStash::Codecs::Multiline.new("pattern" => "^\\s", "what" => "previous", "multiline_tag" => "hurray" )
39
+ config.update("pattern" => "^\\s", "what" => "previous", "multiline_tag" => "hurray")
28
40
  lines = [ "hello world", " second line", "another first line" ]
29
- events = []
30
- lines.each do |line|
31
- codec.decode(line) do |event|
32
- events << event
33
- end
34
- end
41
+ line_producer.call(lines)
35
42
  codec.flush { |e| events << e }
36
- insist { events.size } == 2
37
- insist { events[0]["tags"] }.include?("hurray")
38
- insist { events[1]["tags"] }.nil?
43
+
44
+ expect(events.size).to eq 2
45
+ expect(events[0]["tags"]).to include("hurray")
46
+ expect(events[1]["tags"]).to be_nil
39
47
  end
40
48
 
41
49
  it "should handle new lines in messages" do
42
- codec = LogStash::Codecs::Multiline.new("pattern" => '^\s', "what" => "previous")
43
- line = "one\ntwo\n two.2\nthree\n"
44
- events = []
45
- codec.decode(line) do |event|
46
- events << event
50
+ config.update("pattern" => '\D', "what" => "previous")
51
+ lineio = StringIO.new("1234567890\nA234567890\nB234567890\n0987654321\n")
52
+ until lineio.eof
53
+ line = lineio.read(256) #when this is set to 36 the tests fail
54
+ codec.decode(line) {|evt| events.push(evt)}
47
55
  end
48
56
  codec.flush { |e| events << e }
49
- insist { events.size } == 3
50
- insist { events[0]["message"] } == "one"
51
- insist { events[1]["message"] } == "two\n two.2"
52
- insist { events[2]["message"] } == "three"
57
+ expect(events[0]["message"]).to eq "1234567890\nA234567890\nB234567890"
58
+ expect(events[1]["message"]).to eq "0987654321"
53
59
  end
54
60
 
55
61
  it "should allow grok patterns to be used" do
56
- codec = LogStash::Codecs::Multiline.new(
62
+ config.update(
57
63
  "pattern" => "^%{NUMBER} %{TIME}",
58
64
  "negate" => true,
59
65
  "what" => "previous"
@@ -61,56 +67,47 @@ describe LogStash::Codecs::Multiline do
61
67
 
62
68
  lines = [ "120913 12:04:33 first line", "second line", "third line" ]
63
69
 
64
- events = []
65
- lines.each do |line|
66
- codec.decode(line) do |event|
67
- events << event
68
- end
69
- end
70
+ line_producer.call(lines)
70
71
  codec.flush { |e| events << e }
71
72
 
72
73
  insist { events.size } == 1
73
74
  insist { events.first["message"] } == lines.join("\n")
74
75
  end
75
76
 
76
-
77
77
  context "using default UTF-8 charset" do
78
78
 
79
79
  it "should decode valid UTF-8 input" do
80
- codec = LogStash::Codecs::Multiline.new("pattern" => "^\\s", "what" => "previous")
80
+ config.update("pattern" => "^\\s", "what" => "previous")
81
81
  lines = [ "foobar", "κόσμε" ]
82
- events = []
83
82
  lines.each do |line|
84
- insist { line.encoding.name } == "UTF-8"
85
- insist { line.valid_encoding? } == true
86
-
83
+ expect(line.encoding.name).to eq "UTF-8"
84
+ expect(line.valid_encoding?).to be_truthy
87
85
  codec.decode(line) { |event| events << event }
88
86
  end
89
87
  codec.flush { |e| events << e }
90
- insist { events.size } == 2
88
+ expect(events.size).to eq 2
91
89
 
92
90
  events.zip(lines).each do |tuple|
93
- insist { tuple[0]["message"] } == tuple[1]
94
- insist { tuple[0]["message"].encoding.name } == "UTF-8"
91
+ expect(tuple[0]["message"]).to eq tuple[1]
92
+ expect(tuple[0]["message"].encoding.name).to eq "UTF-8"
95
93
  end
96
94
  end
97
95
 
98
96
  it "should escape invalid sequences" do
99
- codec = LogStash::Codecs::Multiline.new("pattern" => "^\\s", "what" => "previous")
97
+ config.update("pattern" => "^\\s", "what" => "previous")
100
98
  lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
101
- events = []
102
99
  lines.each do |line|
103
- insist { line.encoding.name } == "UTF-8"
104
- insist { line.valid_encoding? } == false
100
+ expect(line.encoding.name).to eq "UTF-8"
101
+ expect(line.valid_encoding?).to eq false
105
102
 
106
103
  codec.decode(line) { |event| events << event }
107
104
  end
108
105
  codec.flush { |e| events << e }
109
- insist { events.size } == 2
106
+ expect(events.size).to eq 2
110
107
 
111
108
  events.zip(lines).each do |tuple|
112
- insist { tuple[0]["message"] } == tuple[1].inspect[1..-2]
113
- insist { tuple[0]["message"].encoding.name } == "UTF-8"
109
+ expect(tuple[0]["message"]).to eq tuple[1].inspect[1..-2]
110
+ expect(tuple[0]["message"].encoding.name).to eq "UTF-8"
114
111
  end
115
112
  end
116
113
  end
@@ -119,27 +116,26 @@ describe LogStash::Codecs::Multiline do
119
116
  context "with valid non UTF-8 source encoding" do
120
117
 
121
118
  it "should encode to UTF-8" do
122
- codec = LogStash::Codecs::Multiline.new("charset" => "ISO-8859-1", "pattern" => "^\\s", "what" => "previous")
119
+ config.update("charset" => "ISO-8859-1", "pattern" => "^\\s", "what" => "previous")
123
120
  samples = [
124
121
  ["foobar", "foobar"],
125
122
  ["\xE0 Montr\xE9al", "à Montréal"],
126
123
  ]
127
124
 
128
125
  # lines = [ "foo \xED\xB9\x81\xC3", "bar \xAD" ]
129
- events = []
130
126
  samples.map{|(a, b)| a.force_encoding("ISO-8859-1")}.each do |line|
131
- insist { line.encoding.name } == "ISO-8859-1"
132
- insist { line.valid_encoding? } == true
127
+ expect(line.encoding.name).to eq "ISO-8859-1"
128
+ expect(line.valid_encoding?).to eq true
133
129
 
134
130
  codec.decode(line) { |event| events << event }
135
131
  end
136
132
  codec.flush { |e| events << e }
137
- insist { events.size } == 2
133
+ expect(events.size).to eq 2
138
134
 
139
135
  events.zip(samples.map{|(a, b)| b}).each do |tuple|
140
- insist { tuple[1].encoding.name } == "UTF-8"
141
- insist { tuple[0]["message"] } == tuple[1]
142
- insist { tuple[0]["message"].encoding.name } == "UTF-8"
136
+ expect(tuple[1].encoding.name).to eq "UTF-8"
137
+ expect(tuple[0]["message"]).to eq tuple[1]
138
+ expect(tuple[0]["message"].encoding.name).to eq "UTF-8"
143
139
  end
144
140
  end
145
141
  end
@@ -147,25 +143,25 @@ describe LogStash::Codecs::Multiline do
147
143
  context "with invalid non UTF-8 source encoding" do
148
144
 
149
145
  it "should encode to UTF-8" do
150
- codec = LogStash::Codecs::Multiline.new("charset" => "ASCII-8BIT", "pattern" => "^\\s", "what" => "previous")
146
+ config.update("charset" => "ASCII-8BIT", "pattern" => "^\\s", "what" => "previous")
151
147
  samples = [
152
148
  ["\xE0 Montr\xE9al", "� Montr�al"],
153
149
  ["\xCE\xBA\xCF\x8C\xCF\x83\xCE\xBC\xCE\xB5", "����������"],
154
150
  ]
155
151
  events = []
156
152
  samples.map{|(a, b)| a.force_encoding("ASCII-8BIT")}.each do |line|
157
- insist { line.encoding.name } == "ASCII-8BIT"
158
- insist { line.valid_encoding? } == true
153
+ expect(line.encoding.name).to eq "ASCII-8BIT"
154
+ expect(line.valid_encoding?).to eq true
159
155
 
160
156
  codec.decode(line) { |event| events << event }
161
157
  end
162
158
  codec.flush { |e| events << e }
163
- insist { events.size } == 2
159
+ expect(events.size).to eq 2
164
160
 
165
161
  events.zip(samples.map{|(a, b)| b}).each do |tuple|
166
- insist { tuple[1].encoding.name } == "UTF-8"
167
- insist { tuple[0]["message"] } == tuple[1]
168
- insist { tuple[0]["message"].encoding.name } == "UTF-8"
162
+ expect(tuple[1].encoding.name).to eq "UTF-8"
163
+ expect(tuple[0]["message"]).to eq tuple[1]
164
+ expect(tuple[0]["message"].encoding.name).to eq "UTF-8"
169
165
  end
170
166
  end
171
167
 
@@ -183,7 +179,7 @@ describe LogStash::Codecs::Multiline do
183
179
  let(:options) {
184
180
  {
185
181
  "pattern" => "^-",
186
- "what" => "previous",
182
+ "what" => "previous",
187
183
  "max_lines" => max_lines,
188
184
  "max_bytes" => "2 mb"
189
185
  }
@@ -203,7 +199,7 @@ describe LogStash::Codecs::Multiline do
203
199
  let(:options) {
204
200
  {
205
201
  "pattern" => "^-",
206
- "what" => "previous",
202
+ "what" => "previous",
207
203
  "max_lines" => 20000,
208
204
  "max_bytes" => max_bytes
209
205
  }
@@ -218,4 +214,114 @@ describe LogStash::Codecs::Multiline do
218
214
  end
219
215
  end
220
216
  end
217
+
218
+ describe "auto flushing" do
219
+ let(:config) { {} }
220
+ let(:codec) { MultilineRspec.new(config).tap {|c| c.register} }
221
+ let(:events) { [] }
222
+ let(:lines) do
223
+ { "en.log" => ["hello world", " second line", " third line"],
224
+ "fr.log" => ["Salut le Monde", " deuxième ligne", " troisième ligne"],
225
+ "de.log" => ["Hallo Welt"] }
226
+ end
227
+ let(:listener_class) { LineListener }
228
+ let(:auto_flush_interval) { 0.5 }
229
+
230
+ let(:line_producer) do
231
+ lambda do |path|
232
+ #create a listener that holds upstream state
233
+ listener = listener_class.new(events, codec, path)
234
+ lines[path].each do |data|
235
+ listener.accept(data)
236
+ end
237
+ end
238
+ end
239
+
240
+ context "when auto_flush_interval is not set" do
241
+ it "does not build any events" do
242
+ config.update("pattern" => "^\\s", "what" => "previous")
243
+ line_producer.call("en.log")
244
+ sleep auto_flush_interval + 0.1
245
+ expect(events.size).to eq(0)
246
+ expect(codec.buffer_size).to eq(3)
247
+ end
248
+ end
249
+
250
+ context "when the auto_flush raises an exception" do
251
+ let(:errmsg) { "OMG, Daleks!" }
252
+ let(:listener_class) { LineErrorListener }
253
+
254
+ it "does not build any events, logs an error and the buffer data remains" do
255
+ config.update("pattern" => "^\\s", "what" => "previous",
256
+ "auto_flush_interval" => auto_flush_interval)
257
+ codec.logger = MultilineLogTracer.new
258
+ line_producer.call("en.log")
259
+ sleep(auto_flush_interval + 0.1)
260
+ msg, args = codec.logger.trace_for(:error)
261
+ expect(msg).to eq("Multiline: flush downstream error")
262
+ expect(args[:exception].message).to eq(errmsg)
263
+ expect(events.size).to eq(0)
264
+ expect(codec.buffer_size).to eq(3)
265
+ end
266
+ end
267
+
268
+ def assert_produced_events(key, sleeping)
269
+ line_producer.call(key)
270
+ sleep(sleeping)
271
+ yield
272
+ expect(codec).to have_an_empty_buffer
273
+ end
274
+
275
+ context "mode: previous, when there are pauses between multiline file writes" do
276
+ it "auto-flushes events from the accumulated lines to the queue" do
277
+ config.update("pattern" => "^\\s", "what" => "previous",
278
+ "auto_flush_interval" => auto_flush_interval)
279
+
280
+ assert_produced_events("en.log", auto_flush_interval + 0.1) do
281
+ expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
282
+ end
283
+
284
+ line_producer.call("fr.log")
285
+ #next line(s) come before auto-flush i.e. assert its buffered
286
+ sleep(auto_flush_interval - 0.3)
287
+ expect(codec.buffer_size).to eq(3)
288
+ expect(events.size).to eq(1)
289
+
290
+ assert_produced_events("de.log", auto_flush_interval + 0.1) do
291
+ # now the events are generated
292
+ expect(events[1]).to match_path_and_line("fr.log", lines["fr.log"])
293
+ expect(events[2]).to match_path_and_line("de.log", lines["de.log"])
294
+ end
295
+ end
296
+ end
297
+
298
+ context "mode: next, when there are pauses between multiline file writes" do
299
+
300
+ let(:lines) do
301
+ { "en.log" => ["hello world++", "second line++", "third line"],
302
+ "fr.log" => ["Salut le Monde++", "deuxième ligne++", "troisième ligne"],
303
+ "de.log" => ["Hallo Welt"] }
304
+ end
305
+
306
+ it "auto-flushes events from the accumulated lines to the queue" do
307
+ config.update("pattern" => "\\+\\+$", "what" => "next",
308
+ "auto_flush_interval" => auto_flush_interval)
309
+
310
+ assert_produced_events("en.log", auto_flush_interval + 0.1) do
311
+ # wait for auto_flush
312
+ expect(events[0]).to match_path_and_line("en.log", lines["en.log"])
313
+ end
314
+
315
+ assert_produced_events("de.log", auto_flush_interval - 0.3) do
316
+ #this file is read before auto-flush
317
+ expect(events[1]).to match_path_and_line("de.log", lines["de.log"])
318
+ end
319
+
320
+ assert_produced_events("fr.log", auto_flush_interval + 0.1) do
321
+ # wait for auto_flush
322
+ expect(events[2]).to match_path_and_line("fr.log", lines["fr.log"])
323
+ end
324
+ end
325
+ end
326
+ end
221
327
  end
@@ -1,3 +1,5 @@
1
+
2
+
1
3
  def decode_events
2
4
  multiline = LogStash::Codecs::Multiline.new(options)
3
5
 
@@ -10,3 +12,112 @@ def decode_events
10
12
  multiline.flush { |event| events << event }
11
13
  events
12
14
  end
15
+
16
+ class LineListener
17
+ attr_reader :data, :path, :queue, :codec
18
+ # use attr_reader to define noop methods of Listener API
19
+ attr_reader :deleted, :created, :error, :eof #, :line
20
+
21
+ def initialize(queue, codec, path = '')
22
+ # store state from upstream
23
+ @queue = queue
24
+ @codec = codec
25
+ @path = path
26
+ end
27
+
28
+ # receives a line from some upstream source
29
+ # and sends it downstream
30
+ def accept(data)
31
+ @codec.accept dup_adding_state(data)
32
+ end
33
+
34
+ def process_event(event)
35
+ event["path"] = path
36
+ @queue << event
37
+ end
38
+
39
+ def add_state(data)
40
+ @data = data
41
+ self
42
+ end
43
+
44
+ private
45
+
46
+ # dup and add state for downstream
47
+ def dup_adding_state(line)
48
+ self.class.new(queue, codec, path).add_state(line)
49
+ end
50
+ end
51
+
52
+ class LineErrorListener < LineListener
53
+ def process_event(event)
54
+ raise StandardError.new("OMG, Daleks!")
55
+ end
56
+ end
57
+
58
+ class MultilineRspec < LogStash::Codecs::Multiline
59
+ def internal_buffer
60
+ @buffer
61
+ end
62
+ def buffer_size
63
+ @buffer.size
64
+ end
65
+ end
66
+
67
+ class TracerBase
68
+ def initialize() @tracer = []; end
69
+
70
+ def trace_for(symbol)
71
+ params = @tracer.assoc(symbol)
72
+ params.nil? ? false : params.last
73
+ end
74
+
75
+ def clear()
76
+ @tracer.clear()
77
+ end
78
+ end
79
+
80
+ class MultilineLogTracer < TracerBase
81
+ def warn(*args) @tracer.push [:warn, args]; end
82
+ def error(*args) @tracer.push [:error, args]; end
83
+ def debug(*args) @tracer.push [:debug, args]; end
84
+ def info(*args) @tracer.push [:info, args]; end
85
+
86
+ def info?() true; end
87
+ def debug?() true; end
88
+ def warn?() true; end
89
+ def error?() true; end
90
+ end
91
+
92
+ class AutoFlushTracer < TracerBase
93
+ def auto_flush() @tracer.push [:auto_flush, true]; end
94
+ end
95
+
96
+ class IdentityMapCodecTracer < TracerBase
97
+ def clone() self.class.new; end
98
+ def decode(data) @tracer.push [:decode, data]; end
99
+ def encode(event) @tracer.push [:encode, event]; end
100
+ def flush(&block) @tracer.push [:flush, block.call]; end
101
+ def close() @tracer.push [:close, true]; end
102
+ def logger() @logger ||= MultilineLogTracer.new; end
103
+ end
104
+
105
+ RSpec::Matchers.define(:have_an_empty_buffer) do
106
+ match do |actual|
107
+ actual.buffer_size.zero?
108
+ end
109
+
110
+ failure_message do
111
+ "Expecting #{actual.buffer_size} to be 0"
112
+ end
113
+ end
114
+
115
+ RSpec::Matchers.define(:match_path_and_line) do |path, line|
116
+ match do |actual|
117
+ actual["path"] == path && actual["message"] == line.join($/)
118
+ end
119
+
120
+ failure_message do
121
+ "Expecting #{actual['path']} to equal `#{path}` and #{actual["message"]} to equal #{line.join($/)}"
122
+ end
123
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-multiline
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-19 00:00:00.000000000 Z
11
+ date: 2015-12-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logstash-core
@@ -84,9 +84,11 @@ files:
84
84
  - LICENSE
85
85
  - NOTICE.TXT
86
86
  - README.md
87
+ - lib/logstash/codecs/auto_flush.rb
87
88
  - lib/logstash/codecs/identity_map_codec.rb
88
89
  - lib/logstash/codecs/multiline.rb
89
90
  - logstash-codec-multiline.gemspec
91
+ - spec/codecs/auto_flush_spec.rb
90
92
  - spec/codecs/identity_map_codec_spec.rb
91
93
  - spec/codecs/multiline_spec.rb
92
94
  - spec/supports/helpers.rb
@@ -117,6 +119,7 @@ signing_key:
117
119
  specification_version: 4
118
120
  summary: The multiline codec will collapse multiline messages and merge them into a single event.
119
121
  test_files:
122
+ - spec/codecs/auto_flush_spec.rb
120
123
  - spec/codecs/identity_map_codec_spec.rb
121
124
  - spec/codecs/multiline_spec.rb
122
125
  - spec/supports/helpers.rb