logstash-codec-cef 5.0.2-java → 5.0.3-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6a264129640c226f88fc36cc63ebdd666f37990c8fba3ff5f97e97beb4a7c674
4
- data.tar.gz: c34c004be92402ab0d8b768e3a4e41c07c6e48efc8a61f1b8a49ed073f38e7f9
3
+ metadata.gz: 7c1e2d59b4849c66f6d60d93c0fe03f11e330c97bedfe25280919f3651b5508c
4
+ data.tar.gz: 4b44ff90abb4bbb14e3a5268df6a841e9354f49ab8fef1c3dfd8ffb6798cde85
5
5
  SHA512:
6
- metadata.gz: 511c08bfa584988789a5623c91be14f8c9244989dbd84b8eb5bda4043a91862dca29887aaf47991a8857214f59b9c8ba614c22ceaa95375c191a919493404380
7
- data.tar.gz: 8000dffe4b20cd9f7e698009337634694a64a3b7674ecbc638c633e64f8f62db80bc7d2b8b6062bc54e1c563ac05305cb52c1703f605864b2831f590b5311eb9
6
+ metadata.gz: 68f97c0e0361d3b889c62f8502fb2802d24770266e0dc306ee5d327c6b3e9e3405aaf9db9c53e033b46b052bec82b3f8ec9d2df63c99869d5d8e87e1523e1f89
7
+ data.tar.gz: e2335c058a3d7fbbfa57e57eeb008903b4423063094d543948161864438e8fd65ea09df2a275853991c0ab15122680f8fa4cccb49504ff43fdc9693658d0db75
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 5.0.3
2
+ - Fix handling of higher-plane UTF-8 characters in message body
3
+
1
4
  ## 5.0.2
2
5
  - Update gemspec summary
3
6
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012–2016 Elasticsearch <http://www.elastic.co>
1
+ Copyright (c) 2012-2018 Elasticsearch <http://www.elastic.co>
2
2
 
3
3
  Licensed under the Apache License, Version 2.0 (the "License");
4
4
  you may not use this file except in compliance with the License.
@@ -1,5 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/util/buftok"
3
+ require "logstash/util/charset"
3
4
  require "logstash/codecs/base"
4
5
  require "json"
5
6
 
@@ -80,6 +81,12 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
80
81
  public
81
82
  def initialize(params={})
82
83
  super(params)
84
+
85
+ # CEF input MUST be UTF-8, per the CEF White Paper that serves as the format's specification:
86
+ # https://web.archive.org/web/20160422182529/https://kc.mcafee.com/resources/sites/MCAFEE/content/live/CORP_KNOWLEDGEBASE/78000/KB78712/en_US/CEF_White_Paper_20100722.pdf
87
+ @utf8_charset = LogStash::Util::Charset.new('UTF-8')
88
+ @utf8_charset.logger = self.logger
89
+
83
90
  if @delimiter
84
91
  # Logstash configuration doesn't have built-in support for escaping,
85
92
  # so we implement it here. Feature discussion for escaping is here:
@@ -110,6 +117,12 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
110
117
  event = LogStash::Event.new
111
118
  event.set(raw_data_field, data) unless raw_data_field.nil?
112
119
 
120
+ @utf8_charset.convert(data)
121
+
122
+ # Several of the many operations in the rest of this method will fail when they encounter UTF8-tagged strings
123
+ # that contain invalid byte sequences; fail early to avoid wasted work.
124
+ fail('invalid byte sequence in UTF-8') unless data.valid_encoding?
125
+
113
126
  # Strip any quotations at the start and end, flex connectors seem to send this
114
127
  if data[0] == "\""
115
128
  data = data[1..-2]
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-codec-cef'
4
- s.version = '5.0.2'
4
+ s.version = '5.0.3'
5
5
  s.platform = 'java'
6
6
  s.licenses = ['Apache License (2.0)']
7
7
  s.summary = "Reads the ArcSight Common Event Format (CEF)."
@@ -509,11 +509,44 @@ describe LogStash::Codecs::CEF do
509
509
  end
510
510
  end
511
511
 
512
+ context 'with UTF-8 message' do
513
+ let(:message) { 'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=192.168.1.11 target=aaaaaああああaaaa msg=Description Omitted' }
514
+
515
+ # since this spec is encoded UTF-8, the literal strings it contains are encoded with UTF-8,
516
+ # but codecs in Logstash tend to receive their input as BINARY (or: ASCII-8BIT); ensure that
517
+ # we can handle either without losing the UTF-8 characters from the higher planes.
518
+ %w(
519
+ BINARY
520
+ UTF-8
521
+ ).each do |external_encoding|
522
+ context "externally encoded as #{external_encoding}" do
523
+ let(:message) { super().force_encoding(external_encoding) }
524
+ it 'should keep the higher-plane characters' do
525
+ subject.decode(message.dup) do |event|
526
+ validate(event)
527
+ insist { event.get("target") } == "aaaaaああああaaaa"
528
+ insist { event.get("target").encoding } == Encoding::UTF_8
529
+ end
530
+ end
531
+ end
532
+ end
533
+ end
534
+
535
+ context 'non-UTF-8 message' do
536
+ let(:message) { 'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=192.168.1.11 target=aaaaaああああaaaa msg=Description Omitted'.encode('SHIFT_JIS') }
537
+ it 'should emit message unparsed with _cefparsefailure tag' do
538
+ subject.decode(message.dup) do |event|
539
+ insist { event.get("message").bytes.to_a } == message.bytes.to_a
540
+ insist { event.get("tags") } == ['_cefparsefailure']
541
+ end
542
+ end
543
+ end
544
+
512
545
  context "with raw_data_field set" do
513
546
  subject(:codec) { LogStash::Codecs::CEF.new("raw_data_field" => "message_raw") }
514
547
 
515
548
  it "should return the raw message in field message_raw" do
516
- subject.decode(message) do |e|
549
+ subject.decode(message.dup) do |e|
517
550
  validate(e)
518
551
  insist { e.get("message_raw") } == message
519
552
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-cef
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.2
4
+ version: 5.0.3
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-07 00:00:00.000000000 Z
11
+ date: 2018-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  version: '0'
85
85
  requirements: []
86
86
  rubyforge_project:
87
- rubygems_version: 2.6.11
87
+ rubygems_version: 2.6.13
88
88
  signing_key:
89
89
  specification_version: 4
90
90
  summary: Reads the ArcSight Common Event Format (CEF).