logstash-codec-cef 5.0.2-java → 5.0.3-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6a264129640c226f88fc36cc63ebdd666f37990c8fba3ff5f97e97beb4a7c674
4
- data.tar.gz: c34c004be92402ab0d8b768e3a4e41c07c6e48efc8a61f1b8a49ed073f38e7f9
3
+ metadata.gz: 7c1e2d59b4849c66f6d60d93c0fe03f11e330c97bedfe25280919f3651b5508c
4
+ data.tar.gz: 4b44ff90abb4bbb14e3a5268df6a841e9354f49ab8fef1c3dfd8ffb6798cde85
5
5
  SHA512:
6
- metadata.gz: 511c08bfa584988789a5623c91be14f8c9244989dbd84b8eb5bda4043a91862dca29887aaf47991a8857214f59b9c8ba614c22ceaa95375c191a919493404380
7
- data.tar.gz: 8000dffe4b20cd9f7e698009337634694a64a3b7674ecbc638c633e64f8f62db80bc7d2b8b6062bc54e1c563ac05305cb52c1703f605864b2831f590b5311eb9
6
+ metadata.gz: 68f97c0e0361d3b889c62f8502fb2802d24770266e0dc306ee5d327c6b3e9e3405aaf9db9c53e033b46b052bec82b3f8ec9d2df63c99869d5d8e87e1523e1f89
7
+ data.tar.gz: e2335c058a3d7fbbfa57e57eeb008903b4423063094d543948161864438e8fd65ea09df2a275853991c0ab15122680f8fa4cccb49504ff43fdc9693658d0db75
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 5.0.3
2
+ - Fix handling of higher-plane UTF-8 characters in message body
3
+
1
4
  ## 5.0.2
2
5
  - Update gemspec summary
3
6
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012–2016 Elasticsearch <http://www.elastic.co>
1
+ Copyright (c) 2012-2018 Elasticsearch <http://www.elastic.co>
2
2
 
3
3
  Licensed under the Apache License, Version 2.0 (the "License");
4
4
  you may not use this file except in compliance with the License.
@@ -1,5 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/util/buftok"
3
+ require "logstash/util/charset"
3
4
  require "logstash/codecs/base"
4
5
  require "json"
5
6
 
@@ -80,6 +81,12 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
80
81
  public
81
82
  def initialize(params={})
82
83
  super(params)
84
+
85
+ # CEF input MUST be UTF-8, per the CEF White Paper that serves as the format's specification:
86
+ # https://web.archive.org/web/20160422182529/https://kc.mcafee.com/resources/sites/MCAFEE/content/live/CORP_KNOWLEDGEBASE/78000/KB78712/en_US/CEF_White_Paper_20100722.pdf
87
+ @utf8_charset = LogStash::Util::Charset.new('UTF-8')
88
+ @utf8_charset.logger = self.logger
89
+
83
90
  if @delimiter
84
91
  # Logstash configuration doesn't have built-in support for escaping,
85
92
  # so we implement it here. Feature discussion for escaping is here:
@@ -110,6 +117,12 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
110
117
  event = LogStash::Event.new
111
118
  event.set(raw_data_field, data) unless raw_data_field.nil?
112
119
 
120
+ @utf8_charset.convert(data)
121
+
122
+ # Several of the many operations in the rest of this method will fail when they encounter UTF8-tagged strings
123
+ # that contain invalid byte sequences; fail early to avoid wasted work.
124
+ fail('invalid byte sequence in UTF-8') unless data.valid_encoding?
125
+
113
126
  # Strip any quotations at the start and end, flex connectors seem to send this
114
127
  if data[0] == "\""
115
128
  data = data[1..-2]
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-codec-cef'
4
- s.version = '5.0.2'
4
+ s.version = '5.0.3'
5
5
  s.platform = 'java'
6
6
  s.licenses = ['Apache License (2.0)']
7
7
  s.summary = "Reads the ArcSight Common Event Format (CEF)."
@@ -509,11 +509,44 @@ describe LogStash::Codecs::CEF do
509
509
  end
510
510
  end
511
511
 
512
+ context 'with UTF-8 message' do
513
+ let(:message) { 'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=192.168.1.11 target=aaaaaああああaaaa msg=Description Omitted' }
514
+
515
+ # since this spec is encoded UTF-8, the literal strings it contains are encoded with UTF-8,
516
+ # but codecs in Logstash tend to receive their input as BINARY (or: ASCII-8BIT); ensure that
517
+ # we can handle either without losing the UTF-8 characters from the higher planes.
518
+ %w(
519
+ BINARY
520
+ UTF-8
521
+ ).each do |external_encoding|
522
+ context "externally encoded as #{external_encoding}" do
523
+ let(:message) { super().force_encoding(external_encoding) }
524
+ it 'should keep the higher-plane characters' do
525
+ subject.decode(message.dup) do |event|
526
+ validate(event)
527
+ insist { event.get("target") } == "aaaaaああああaaaa"
528
+ insist { event.get("target").encoding } == Encoding::UTF_8
529
+ end
530
+ end
531
+ end
532
+ end
533
+ end
534
+
535
+ context 'non-UTF-8 message' do
536
+ let(:message) { 'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=192.168.1.11 target=aaaaaああああaaaa msg=Description Omitted'.encode('SHIFT_JIS') }
537
+ it 'should emit message unparsed with _cefparsefailure tag' do
538
+ subject.decode(message.dup) do |event|
539
+ insist { event.get("message").bytes.to_a } == message.bytes.to_a
540
+ insist { event.get("tags") } == ['_cefparsefailure']
541
+ end
542
+ end
543
+ end
544
+
512
545
  context "with raw_data_field set" do
513
546
  subject(:codec) { LogStash::Codecs::CEF.new("raw_data_field" => "message_raw") }
514
547
 
515
548
  it "should return the raw message in field message_raw" do
516
- subject.decode(message) do |e|
549
+ subject.decode(message.dup) do |e|
517
550
  validate(e)
518
551
  insist { e.get("message_raw") } == message
519
552
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-cef
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.2
4
+ version: 5.0.3
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-11-07 00:00:00.000000000 Z
11
+ date: 2018-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +84,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
84
  version: '0'
85
85
  requirements: []
86
86
  rubyforge_project:
87
- rubygems_version: 2.6.11
87
+ rubygems_version: 2.6.13
88
88
  signing_key:
89
89
  specification_version: 4
90
90
  summary: Reads the ArcSight Common Event Format (CEF).