logstash-codec-cef 6.2.4-java → 6.2.6-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b204281f8d8ab5b22fc8f75231d3a31dd2ab4c2254c7bd4dca981bc996f5f38d
4
- data.tar.gz: 8e255e40a7967fcd0326bbbd2db40511faaf55ce55222790feaa1b19b20fe3af
3
+ metadata.gz: 344660a8caa1f5fbdde48422db80b287e6afff7e8c1d3ebdeb5f70269431a514
4
+ data.tar.gz: 9f061964eae0cdcd46fcefe9b5feefc05c72074935c44d83a8f35c5e54564a6c
5
5
  SHA512:
6
- metadata.gz: 3be6e9d4a944e9eecf8d75dd8e4880c32f12c89eca25b17d1ba33bc33ed95179d34ff8af373f8d3fbd3d9a5c81d64fb4317f4955b0fd92d81aa752473ff94f0e
7
- data.tar.gz: a24d876f0aeeafeb1d24f2be62dca556f433bf6945ff88cf5d44d1cf97270127429ec68ed9c964b579a39d167d9e2558ccd3fdf567a8e8104a8bb9dec1db30cf
6
+ metadata.gz: 791c750b7085fbefec2e71537d4452174e851bfa28a7d6f046f67d07a543148ebceb51acbc8356b42c90fb2f0fcac28c29ce5f95624ac598070f5389e3f95f72
7
+ data.tar.gz: c995d8153001929fad98c0dab84664f6af1c6c7b4b873f1be2277d5b0f64e45531178c73fc8fa339ee7c4644c118cf2cfce37c812a4ea6cd6f9d2221d543a470
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 6.2.6
2
+ - Fix: when decoding, escaped newlines and carriage returns in extension values are now correctly decoded into literal newlines and carriage returns respectively [#98](https://github.com/logstash-plugins/logstash-codec-cef/pull/98)
3
+ - Fix: when decoding, non-CEF payloads are identified and intercepted to prevent data-loss and corruption. They now cause a descriptive log message to be emitted, and are emitted as their own `_cefparsefailure`-tagged event containing the original bytes in its `message` field [#99](https://github.com/logstash-plugins/logstash-codec-cef/issues/99)
4
+ - Fix: when decoding while configured with a `delimiter`, flushing this codec now correctly consumes the remainder of its internal buffer. This resolves an issue where bytes that are written without a trailing delimiter could be lost [#100](https://github.com/logstash-plugins/logstash-codec-cef/issues/100)
5
+
6
+ ## 6.2.5
7
+ - [DOC] Update link to CEF implementation guide [#97](https://github.com/logstash-plugins/logstash-codec-cef/pull/97)
8
+
1
9
  ## 6.2.4
2
10
  - [DOC] Emphasize importance of delimiter setting for byte stream inputs [#95](https://github.com/logstash-plugins/logstash-codec-cef/pull/95)
3
11
 
data/docs/index.asciidoc CHANGED
@@ -20,12 +20,11 @@ include::{include_path}/plugin_header.asciidoc[]
20
20
 
21
21
  ==== Description
22
22
 
23
- Implementation of a Logstash codec for the ArcSight Common Event Format (CEF)
24
- Based on Revision 20 of Implementing ArcSight CEF, dated from June 05, 2013
25
- https://community.saas.hpe.com/dcvta86296/attachments/dcvta86296/connector-documentation/1116/1/CommonEventFormatv23.pdf
23
+ Implementation of a Logstash codec for the ArcSight Common Event Format (CEF).
24
+ It is based on https://www.microfocus.com/documentation/arcsight/arcsight-smartconnectors/pdfdoc/common-event-format-v25/common-event-format-v25.pdf[Implementing ArcSight CEF Revision 25, September 2017].
26
25
 
27
- If this codec receives a payload from an input that is not a valid CEF message, then it will
28
- produce an event with the payload as the 'message' field and a '_cefparsefailure' tag.
26
+ If this codec receives a payload from an input that is not a valid CEF message, then it
27
+ produces an event with the payload as the 'message' field and a '_cefparsefailure' tag.
29
28
 
30
29
  ==== Compatibility with the Elastic Common Schema (ECS)
31
30
 
@@ -84,9 +84,6 @@ class LogStash::Codecs::CEF::TimestampNormalizer
84
84
 
85
85
  # Ruby's `Time::at(sec, microseconds_with_frac)`
86
86
  Time.at(parsed_time.get_epoch_second, Rational(parsed_time.get_nano, 1000))
87
- rescue => e
88
- $stderr.puts "parse_cef_format_sgring(#{value.inspect}, #{context_timezone.inspect}) #!=> #{e.message}"
89
- raise
90
87
  end
91
88
 
92
89
  def resolve_assuming_year(parsed_temporal_accessor)
@@ -102,15 +102,12 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
102
102
  # - non-pipe characters
103
103
  HEADER_PATTERN = /(?:\\\||\\\\|[^|])*?/
104
104
 
105
- # Cache of a scanner pattern that _captures_ a HEADER followed by an unescaped pipe
106
- HEADER_SCANNER = /(#{HEADER_PATTERN})#{Regexp.quote('|')}/
105
+ # Cache of a scanner pattern that _captures_ a HEADER followed by EOF or an unescaped pipe
106
+ HEADER_NEXT_FIELD_PATTERN = /(#{HEADER_PATTERN})#{Regexp.quote('|')}/
107
107
 
108
108
  # Cache of a gsub pattern that matches a backslash-escaped backslash or backslash-escaped pipe, _capturing_ the escaped character
109
109
  HEADER_ESCAPE_CAPTURE = /\\([\\|])/
110
110
 
111
- # Cache of a gsub pattern that matches a backslash-escaped backslash or backslash-escaped equals, _capturing_ the escaped character
112
- EXTENSION_VALUE_ESCAPE_CAPTURE = /\\([\\=])/
113
-
114
111
  # While the original CEF spec calls out that extension keys must be alphanumeric and must not contain spaces,
115
112
  # in practice many "CEF" producers like the Arcsight smart connector produce non-legal keys including underscores,
116
113
  # commas, periods, and square-bracketed index offsets.
@@ -139,8 +136,8 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
139
136
  # - runs of whitespace that are NOT followed by something that looks like a key-equals sequence
140
137
  EXTENSION_VALUE_PATTERN = /(?:\S|\s++(?!#{EXTENSION_KEY_PATTERN}=))*/
141
138
 
142
- # Cache of a scanner pattern that _captures_ extension field key/value pairs
143
- EXTENSION_KEY_VALUE_SCANNER = /(#{EXTENSION_KEY_PATTERN})=(#{EXTENSION_VALUE_PATTERN})\s*/
139
+ # Cache of a pattern that _captures_ the NEXT extension field key/value pair
140
+ EXTENSION_NEXT_KEY_VALUE_PATTERN = /^(#{EXTENSION_KEY_PATTERN})=(#{EXTENSION_VALUE_PATTERN})\s*/
144
141
 
145
142
  ##
146
143
  # @see CEF#sanitize_header_field
@@ -160,10 +157,30 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
160
157
  "=" => "\\=",
161
158
  "\n" => "\\n",
162
159
  "\r" => "\\n",
163
- }
160
+ }.freeze
164
161
  EXTENSION_VALUE_SANITIZER_PATTERN = Regexp.union(EXTENSION_VALUE_SANITIZER_MAPPING.keys)
165
162
  private_constant :EXTENSION_VALUE_SANITIZER_MAPPING, :EXTENSION_VALUE_SANITIZER_PATTERN
166
163
 
164
+
165
+ LITERAL_BACKSLASH = "\\".freeze
166
+ private_constant :LITERAL_BACKSLASH
167
+ LITERAL_NEWLINE = "\n".freeze
168
+ private_constant :LITERAL_NEWLINE
169
+ LITERAL_CARRIAGE_RETURN = "\r".freeze
170
+ private_constant :LITERAL_CARRIAGE_RETURN
171
+
172
+ ##
173
+ # @see CEF#desanitize_extension_val
174
+ EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING = {
175
+ LITERAL_BACKSLASH+LITERAL_BACKSLASH => LITERAL_BACKSLASH,
176
+ LITERAL_BACKSLASH+'=' => '=',
177
+ LITERAL_BACKSLASH+'n' => LITERAL_NEWLINE,
178
+ LITERAL_BACKSLASH+'r' => LITERAL_CARRIAGE_RETURN,
179
+ }.freeze
180
+ EXTENSION_VALUE_SANITIZER_REVERSE_PATTERN = Regexp.union(EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING.keys)
181
+ private_constant :EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING, :EXTENSION_VALUE_SANITIZER_REVERSE_PATTERN
182
+
183
+
167
184
  CEF_PREFIX = 'CEF:'.freeze
168
185
 
169
186
  public
@@ -193,14 +210,24 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
193
210
  public
194
211
  def decode(data, &block)
195
212
  if @delimiter
213
+ @logger.trace("Buffering #{data.bytesize}B of data") if @logger.trace?
196
214
  @buffer.extract(data).each do |line|
215
+ @logger.trace("Decoding #{line.bytesize + @delimiter.bytesize}B of buffered data") if @logger.trace?
197
216
  handle(line, &block)
198
217
  end
199
218
  else
219
+ @logger.trace("Decoding #{data.bytesize}B of unbuffered data") if @logger.trace?
200
220
  handle(data, &block)
201
221
  end
202
222
  end
203
223
 
224
+ def flush(&block)
225
+ if @delimiter && (remainder = @buffer.flush)
226
+ @logger.trace("Flushing #{remainder.bytesize}B of buffered data") if @logger.trace?
227
+ handle(remainder, &block) unless remainder.empty?
228
+ end
229
+ end
230
+
204
231
  def handle(data, &block)
205
232
  original_data = data.dup
206
233
  event = event_factory.new_event
@@ -218,10 +245,16 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
218
245
  end
219
246
 
220
247
  # Use a scanning parser to capture the HEADER_FIELDS
221
- unprocessed_data = data
222
- @header_fields.each do |field_name|
223
- match_data = HEADER_SCANNER.match(unprocessed_data)
224
- break if match_data.nil? # missing fields
248
+ unprocessed_data = data.chomp
249
+ if unprocessed_data.include?(LITERAL_NEWLINE)
250
+ fail("message is not valid CEF because it contains unescaped newline characters; " +
251
+ "use the `delimiter` setting to enable in-codec buffering and delimiter-splitting")
252
+ end
253
+ @header_fields.each_with_index do |field_name, idx|
254
+ match_data = HEADER_NEXT_FIELD_PATTERN.match(unprocessed_data)
255
+ if match_data.nil?
256
+ fail("message is not valid CEF; found #{idx} of 7 required pipe-terminated header fields")
257
+ end
225
258
 
226
259
  escaped_field_value = match_data[1]
227
260
  next if escaped_field_value.nil?
@@ -248,11 +281,14 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
248
281
  event.set(cef_version_field, delete_cef_prefix(event.get(cef_version_field)))
249
282
 
250
283
  # Use a scanning parser to capture the Extension Key/Value Pairs
251
- if message && message.include?('=')
284
+ if message && !message.empty?
252
285
  message = message.strip
253
286
  extension_fields = {}
254
287
 
255
- message.scan(EXTENSION_KEY_VALUE_SCANNER) do |extension_field_key, raw_extension_field_value|
288
+ while (match = message.match(EXTENSION_NEXT_KEY_VALUE_PATTERN))
289
+ extension_field_key, raw_extension_field_value = match.captures
290
+ message = match.post_match
291
+
256
292
  # expand abbreviated extension field keys
257
293
  extension_field_key = @decode_mapping.fetch(extension_field_key, extension_field_key)
258
294
 
@@ -260,10 +296,13 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
260
296
  extension_field_key = extension_field_key.sub(EXTENSION_KEY_ARRAY_CAPTURE, '[\1]\2') if extension_field_key.end_with?(']')
261
297
 
262
298
  # process legal extension field value escapes
263
- extension_field_value = raw_extension_field_value.gsub(EXTENSION_VALUE_ESCAPE_CAPTURE, '\1')
299
+ extension_field_value = desanitize_extension_val(raw_extension_field_value)
264
300
 
265
301
  extension_fields[extension_field_key] = extension_field_value
266
302
  end
303
+ if !message.empty?
304
+ fail("invalid extensions; keyless value present `#{message}`")
305
+ end
267
306
 
268
307
  # in ECS mode, normalize timestamps including timezone.
269
308
  if ecs_compatibility != :disabled
@@ -283,7 +322,7 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
283
322
  yield event
284
323
  rescue => e
285
324
  @logger.error("Failed to decode CEF payload. Generating failure event with payload in message field.",
286
- :exception => e.class, :message => e.message, :backtrace => e.backtrace, :original_data => original_data)
325
+ log_metadata(:original_data => original_data))
287
326
  yield event_factory.new_event("message" => data, "tags" => ["_cefparsefailure"])
288
327
  end
289
328
 
@@ -332,6 +371,19 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
332
371
  @syslog_header = ecs_select[disabled:'syslog',v1:'[log][syslog][header]']
333
372
  end
334
373
 
374
+ ##
375
+ # produces log metadata, injecting the current exception and log-level-relevant backtraces
376
+ # @param context [Hash{Symbol=>Object}]: the base context
377
+ def log_metadata(context={})
378
+ return context unless $!
379
+
380
+ exception_context = {}
381
+ exception_context[:exception] = "#{$!.class}: #{$!.message}"
382
+ exception_context[:backtrace] = $!.backtrace if @logger.debug?
383
+
384
+ exception_context.merge(context)
385
+ end
386
+
335
387
  class CEFField
336
388
  ##
337
389
  # @param name [String]: the full CEF name of a field
@@ -547,6 +599,10 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
547
599
  .gsub(EXTENSION_VALUE_SANITIZER_PATTERN, EXTENSION_VALUE_SANITIZER_MAPPING)
548
600
  end
549
601
 
602
+ def desanitize_extension_val(value)
603
+ value.to_s.gsub(EXTENSION_VALUE_SANITIZER_REVERSE_PATTERN, EXTENSION_VALUE_SANITIZER_REVERSE_MAPPING)
604
+ end
605
+
550
606
  def normalize_timestamp(value, device_timezone_name)
551
607
  value = @timestamp_normalzer.normalize(value, device_timezone_name).iso8601(9)
552
608
 
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-codec-cef'
4
- s.version = '6.2.4'
4
+ s.version = '6.2.6'
5
5
  s.platform = 'java'
6
6
  s.licenses = ['Apache License (2.0)']
7
7
  s.summary = "Reads the ArcSight Common Event Format (CEF)."
@@ -159,7 +159,6 @@ describe LogStash::Codecs::CEF::TimestampNormalizer do
159
159
  context 'and handling a yearless date string from mid january' do
160
160
  let(:time_to_parse) { Time.parse("2021-01-17T00:00:08.123456789Z") }
161
161
  it 'assumes that the date being parsed is in the distant past' do
162
- $stderr.puts(parsable_string)
163
162
  expect(parsed_result.month).to eq(1)
164
163
  expect(parsed_result.year).to eq(time_of_parse.year)
165
164
  end
@@ -409,14 +409,16 @@ describe LogStash::Codecs::CEF do
409
409
  # @yieldparam event [Event]
410
410
  # @yieldreturn [void]
411
411
  # @return [Event]
412
- def decode_one(codec, data)
413
- events = do_decode(codec, data)
412
+ def decode_one(codec, data, flush: true, &block)
413
+ events = do_decode(codec, data, flush: flush)
414
414
  fail("Expected one event, got #{events.size} events: #{events.inspect}") unless events.size == 1
415
415
  event = events.first
416
416
 
417
- if block_given?
418
- aggregate_failures('decode one') do
419
- yield event
417
+ if block
418
+ enriched_event_validation(event) do |e|
419
+ aggregate_failures('decode one') do
420
+ yield e
421
+ end
420
422
  end
421
423
  end
422
424
 
@@ -434,16 +436,35 @@ describe LogStash::Codecs::CEF do
434
436
  # @yieldparam event [Event]
435
437
  # @yieldreturn [void]
436
438
  # @return [Array<Event>]
437
- def do_decode(codec, data)
439
+ def do_decode(codec, data, flush: true, &block)
438
440
  events = []
439
441
  codec.decode(data) do |event|
440
442
  events << event
441
443
  end
444
+ flush && codec.flush do |event|
445
+ events << event
446
+ end
442
447
 
443
- events.each { |event| yield event } if block_given?
448
+ if block
449
+ events.each do |event|
450
+ enriched_event_validation(event, &block)
451
+ end
452
+ end
444
453
 
445
454
  events
446
455
  end
456
+
457
+ ##
458
+ # Enrich event validation by outputting the serialized event to stderr
459
+ # if-and-only-if the provided block's rspec expectations are not met.
460
+ #
461
+ # @param event [#to_hash_with_metadata]
462
+ def enriched_event_validation(event)
463
+ yield(event)
464
+ rescue RSpec::Expectations::ExpectationNotMetError
465
+ $stderr.puts("\e[35m#{event.to_hash_with_metadata}\e[0m\n")
466
+ raise
467
+ end
447
468
  end
448
469
 
449
470
  context "#decode", :ecs_compatibility_support do
@@ -452,7 +473,7 @@ describe LogStash::Codecs::CEF do
452
473
  allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility)
453
474
  end
454
475
 
455
- let (:message) { "CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232" }
476
+ let(:message) { "CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232" }
456
477
 
457
478
  include DecodeHelpers
458
479
 
@@ -465,17 +486,126 @@ describe LogStash::Codecs::CEF do
465
486
  # Related: https://github.com/elastic/logstash/issues/1645
466
487
  subject(:codec) { LogStash::Codecs::CEF.new("delimiter" => '\r\n') }
467
488
 
489
+ let(:message_two) { "CEF:0|fun|whimsy|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232" }
490
+
491
+ # testing implicit flush when
468
492
  it "should parse on the delimiter " do
469
- do_decode(subject,message) do |e|
493
+ do_decode(subject, message, flush: false) do |e|
470
494
  raise Exception.new("Should not get here. If we do, it means the decoder emitted an event before the delimiter was seen?")
471
495
  end
472
496
 
473
- decode_one(subject, "\r\n") do |e|
497
+ # the delimiter's presence flushes what we already received, but not the new bytes we send
498
+ decode_one(subject, "\r\n#{message_two}", flush: false) do |e|
499
+ validate(e)
500
+ insist { e.get(ecs_select[disabled: "deviceVendor", v1:"[observer][vendor]"]) } == "security"
501
+ insist { e.get(ecs_select[disabled: "deviceProduct", v1:"[observer][product]"]) } == "threatmanager"
502
+ end
503
+
504
+ # allowing a flush emits the buffered event with our new bits appended
505
+ decode_one(subject, " split=perfect", flush: true) do |e|
506
+ validate(e)
507
+ insist { e.get(ecs_select[disabled: "deviceVendor", v1:"[observer][vendor]"]) } == "fun"
508
+ insist { e.get(ecs_select[disabled: "deviceProduct", v1:"[observer][product]"]) } == "whimsy"
509
+ insist { e.get("split") } == "perfect"
510
+ end
511
+ end
512
+
513
+ it 'flushes on close' do
514
+ # message does NOT have delimiter, but we still get our event
515
+ decode_one(subject, message, flush: true) do |e|
474
516
  validate(e)
475
517
  insist { e.get(ecs_select[disabled: "deviceVendor", v1:"[observer][vendor]"]) } == "security"
476
518
  insist { e.get(ecs_select[disabled: "deviceProduct", v1:"[observer][product]"]) } == "threatmanager"
477
519
  end
478
520
  end
521
+
522
+ it 'emits multiple from a single decode operation' do
523
+ events = do_decode(subject, "#{message}\r\n#{message_two}")
524
+ expect(events.size).to eq(2)
525
+
526
+ enriched_event_validation(events[0]) do |event|
527
+ validate(event)
528
+ insist { event.get(ecs_select[disabled: "deviceVendor", v1:"[observer][vendor]"]) } == "security"
529
+ insist { event.get(ecs_select[disabled: "deviceProduct", v1:"[observer][product]"]) } == "threatmanager"
530
+ end
531
+
532
+ enriched_event_validation(events[1]) do |event|
533
+ validate(event)
534
+ insist { event.get(ecs_select[disabled: "deviceVendor", v1:"[observer][vendor]"]) } == "fun"
535
+ insist { event.get(ecs_select[disabled: "deviceProduct", v1:"[observer][product]"]) } == "whimsy"
536
+ end
537
+ end
538
+ end
539
+
540
+ # CEF requires seven pipe-terminated headers before optional extensions
541
+ context 'with a non-CEF payload' do
542
+ let(:logger_stub) { double('Logger').as_null_object }
543
+ before(:each) do
544
+ allow_any_instance_of(described_class).to receive(:logger).and_return(logger_stub)
545
+ end
546
+
547
+ context 'containing 0 header-like sections' do
548
+ let(:message) { 'this is not cef' }
549
+ it 'logs helpfully and produces a tagged event' do
550
+ do_decode(subject,message) do |event|
551
+ expect(event.get('tags')).to include('_cefparsefailure')
552
+ expect(event.get('message')).to eq(message)
553
+ end
554
+ expect(logger_stub).to have_received(:error)
555
+ .with(a_string_including('Failed to decode CEF payload. Generating failure event with payload in message field'),
556
+ a_hash_including(exception: a_string_including("found 0 of 7 required pipe-terminated header fields"),
557
+ original_data: message))
558
+ end
559
+ end
560
+ context 'containing 4 header-like sections' do
561
+ let(:message) { "a|b|c with several \\| escaped\\| pipes|d|bananas" }
562
+ it 'logs helpfully and produces a tagged event' do
563
+ do_decode(subject,message) do |event|
564
+ expect(event.get('tags')).to include('_cefparsefailure')
565
+ expect(event.get('message')).to eq(message)
566
+ end
567
+ expect(logger_stub).to have_received(:error)
568
+ .with(a_string_including('Failed to decode CEF payload. Generating failure event with payload in message field'),
569
+ a_hash_including(exception: a_string_including("found 4 of 7 required pipe-terminated header fields"),
570
+ original_data: message))
571
+ end
572
+ end
573
+ context 'containing non-key/value extensions' do
574
+ let (:message) { "CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|this is in the extensions space but it is not valid because it is not equals-separated key/value" }
575
+ it 'logs helpfully and produces a tagged event' do
576
+ do_decode(subject,message) do |event|
577
+ expect(event.get('tags')).to include('_cefparsefailure')
578
+ expect(event.get('message')).to eq(message)
579
+ end
580
+ expect(logger_stub).to have_received(:error)
581
+ .with(a_string_including('Failed to decode CEF payload. Generating failure event with payload in message field'),
582
+ a_hash_including(exception: a_string_including("invalid extensions; keyless value present"),
583
+ original_data: message))
584
+ end
585
+ end
586
+ context 'containing unescaped newlines' do
587
+ # when not using a `delimiter`, we expect exactly one CEF log per call to decode.
588
+ let (:message) {
589
+ <<~EOMESSAGE
590
+ CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.67
591
+ CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.67
592
+ CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.67
593
+ CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.67
594
+ CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.67
595
+ EOMESSAGE
596
+ }
597
+ it 'logs helpfully and produces a tagged event' do
598
+ do_decode(subject, message) do |event|
599
+ expect(event.get('tags')).to include('_cefparsefailure')
600
+ expect(event.get('message')).to eq(message)
601
+ end
602
+ expect(logger_stub).to have_received(:error)
603
+ .with(a_string_including('Failed to decode CEF payload. Generating failure event with payload in message field'),
604
+ a_hash_including(exception: a_string_including("message is not valid CEF because it contains unescaped newline characters",
605
+ "use the `delimiter` setting to enable in-codec buffering and delimiter-splitting"),
606
+ original_data: message))
607
+ end
608
+ end
479
609
  end
480
610
 
481
611
  context 'when a CEF header ends with a pair of properly-escaped backslashes' do
@@ -548,6 +678,23 @@ describe LogStash::Codecs::CEF do
548
678
  end
549
679
  end
550
680
 
681
+ let(:literal_newline) { "\n" }
682
+ let(:literal_carriage_return) { "\r" }
683
+ let(:literal_equals) { "=" }
684
+ let(:literal_backslash) { "\\" }
685
+ let(:escaped_newline) { literal_backslash + 'n' }
686
+ let(:escaped_carriage_return) { literal_backslash + 'r' }
687
+ let(:escaped_equals) { literal_backslash + literal_equals }
688
+ let(:escaped_backslash) { literal_backslash + literal_backslash }
689
+ let(:escaped_sequences_in_extension_value) { "CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|foo=bar msg=this message has escaped equals #{escaped_equals} and escaped newlines #{escaped_newline} escaped carriage returns #{escaped_carriage_return} and escaped backslashes #{escaped_backslash} in it bar=baz" }
690
+ it "decodes embedded newlines, carriage regurns, backslashes, and equals signs" do
691
+ decode_one(subject, escaped_sequences_in_extension_value) do |e|
692
+ insist { e.get("foo") } == 'bar'
693
+ insist { e.get("message") } == "this message has escaped equals #{literal_equals} and escaped newlines #{literal_newline} escaped carriage returns #{literal_carriage_return} and escaped backslashes #{literal_backslash} in it"
694
+ insist { e.get("bar") } == 'baz'
695
+ end
696
+ end
697
+
551
698
  context "zoneless deviceReceiptTime(rt) when deviceTimeZone(dtz) is provided" do
552
699
  let(:cef_formatted_timestamp) { 'Jul 19 2017 10:50:21.127' }
553
700
  let(:zone_name) { 'Europe/Moscow' }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-cef
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.2.4
4
+ version: 6.2.6
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-03-15 00:00:00.000000000 Z
11
+ date: 2022-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement