logstash-codec-cef 5.0.3-java → 5.0.4-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7c1e2d59b4849c66f6d60d93c0fe03f11e330c97bedfe25280919f3651b5508c
4
- data.tar.gz: 4b44ff90abb4bbb14e3a5268df6a841e9354f49ab8fef1c3dfd8ffb6798cde85
3
+ metadata.gz: 2d6aa2e3f0deee7e7dc16646e1803e2514f2e52e5396329c5ca8fbc6a9a11890
4
+ data.tar.gz: fdca34d3a6ce64552a5965a60543c97bd23629ae521f04d9d2757c3f7f5d746a
5
5
  SHA512:
6
- metadata.gz: 68f97c0e0361d3b889c62f8502fb2802d24770266e0dc306ee5d327c6b3e9e3405aaf9db9c53e033b46b052bec82b3f8ec9d2df63c99869d5d8e87e1523e1f89
7
- data.tar.gz: e2335c058a3d7fbbfa57e57eeb008903b4423063094d543948161864438e8fd65ea09df2a275853991c0ab15122680f8fa4cccb49504ff43fdc9693658d0db75
6
+ metadata.gz: b7bbb1fe5a6c5915e6c613e689a2c47ce7795c6b807c564b14b271b14adfe7b8c2f46626421acb6cc9cb45c08c7772801f9ad9bc9432bebe2eb8e2fa8e9f88b1
7
+ data.tar.gz: d2a29e95aaa41635b6240219714da61daff58b75e37aff39e25365120ef66c066f3936d63e6cf2c028eb53e94056d42bbf2b5dc464e7502a314eb4878970349c
@@ -1,3 +1,8 @@
1
+ ## 5.0.4
2
+ - Fix bug in parsing headers where certain legal escape sequences could cause non-escaped pipe characters to be ignored.
3
+ - Fix bug in parsing extension values where a legal unescaped space in a field's value could be interpreted as a field separator (#54)
4
+ - Add explicit handling for extension key names that use array-like syntax that isn't legal with the strict-mode field-reference parser (e.g., `fieldname[0]` becomes `[fieldname][0]`).
5
+
1
6
  ## 5.0.3
2
7
  - Fix handling of higher-plane UTF-8 characters in message body
3
8
 
@@ -74,9 +74,132 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
74
74
  HEADER_FIELDS = ['cefVersion','deviceVendor','deviceProduct','deviceVersion','deviceEventClassId','name','severity']
75
75
 
76
76
  # Translating and flattening the CEF extensions with known field names as documented in the Common Event Format whitepaper
77
- MAPPINGS = { "act" => "deviceAction", "app" => "applicationProtocol", "c6a1" => "deviceCustomIPv6Address1", "c6a1Label" => "deviceCustomIPv6Address1Label", "c6a2" => "deviceCustomIPv6Address2", "c6a2Label" => "deviceCustomIPv6Address2Label", "c6a3" => "deviceCustomIPv6Address3", "c6a3Label" => "deviceCustomIPv6Address3Label", "c6a4" => "deviceCustomIPv6Address4", "c6a4Label" => "deviceCustomIPv6Address4Label", "cat" => "deviceEventCategory", "cfp1" => "deviceCustomFloatingPoint1", "cfp1Label" => "deviceCustomFloatingPoint1Label", "cfp2" => "deviceCustomFloatingPoint2", "cfp2Label" => "deviceCustomFloatingPoint2Label", "cfp3" => "deviceCustomFloatingPoint3", "cfp3Label" => "deviceCustomFloatingPoint3Label", "cfp4" => "deviceCustomFloatingPoint4", "cfp4Label" => "deviceCustomFloatingPoint4Label", "cn1" => "deviceCustomNumber1", "cn1Label" => "deviceCustomNumber1Label", "cn2" => "deviceCustomNumber2", "cn2Label" => "deviceCustomNumber2Label", "cn3" => "deviceCustomNumber3", "cn3Label" => "deviceCustomNumber3Label", "cnt" => "baseEventCount", "cs1" => "deviceCustomString1", "cs1Label" => "deviceCustomString1Label", "cs2" => "deviceCustomString2", "cs2Label" => "deviceCustomString2Label", "cs3" => "deviceCustomString3", "cs3Label" => "deviceCustomString3Label", "cs4" => "deviceCustomString4", "cs4Label" => "deviceCustomString4Label", "cs5" => "deviceCustomString5", "cs5Label" => "deviceCustomString5Label", "cs6" => "deviceCustomString6", "cs6Label" => "deviceCustomString6Label", "dhost" => "destinationHostName", "dmac" => "destinationMacAddress", "dntdom" => "destinationNtDomain", "dpid" => "destinationProcessId", "dpriv" => "destinationUserPrivileges", "dproc" => "destinationProcessName", "dpt" => "destinationPort", "dst" => "destinationAddress", "duid" => "destinationUserId", "duser" => "destinationUserName", "dvc" => "deviceAddress", "dvchost" => "deviceHostName", "dvcpid" => "deviceProcessId", "end" => "endTime", "fname" => "fileName", "fsize" => "fileSize", "in" => "bytesIn", "msg" => "message", "out" => "bytesOut", "outcome" => "eventOutcome", "proto" => "transportProtocol", "request" => "requestUrl", "rt" => "deviceReceiptTime", "shost" => "sourceHostName", "smac" => "sourceMacAddress", "sntdom" => "sourceNtDomain", "spid" => "sourceProcessId", "spriv" => "sourceUserPrivileges", "sproc" => "sourceProcessName", "spt" => "sourcePort", "src" => "sourceAddress", "start" => "startTime", "suid" => "sourceUserId", "suser" => "sourceUserName", "ahost" => "agentHost", "art" => "agentReceiptTime", "at" => "agentType", "aid" => "agentId", "_cefVer" => "cefVersion", "agt" => "agentAddress", "av" => "agentVersion", "atz" => "agentTimeZone", "dtz" => "destinationTimeZone", "slong" => "sourceLongitude", "slat" => "sourceLatitude", "dlong" => "destinationLongitude", "dlat" => "destinationLatitude", "catdt" => "categoryDeviceType", "mrt" => "managerReceiptTime", "amac" => "agentMacAddress" }
78
-
79
- DEPRECATED_HEADER_FIELDS = ['cef_version','cef_vendor','cef_product','cef_device_version','cef_sigid','cef_name','cef_severity']
77
+ MAPPINGS = {
78
+ "act" => "deviceAction",
79
+ "app" => "applicationProtocol",
80
+ "c6a1" => "deviceCustomIPv6Address1",
81
+ "c6a1Label" => "deviceCustomIPv6Address1Label",
82
+ "c6a2" => "deviceCustomIPv6Address2",
83
+ "c6a2Label" => "deviceCustomIPv6Address2Label",
84
+ "c6a3" => "deviceCustomIPv6Address3",
85
+ "c6a3Label" => "deviceCustomIPv6Address3Label",
86
+ "c6a4" => "deviceCustomIPv6Address4",
87
+ "c6a4Label" => "deviceCustomIPv6Address4Label",
88
+ "cat" => "deviceEventCategory",
89
+ "cfp1" => "deviceCustomFloatingPoint1",
90
+ "cfp1Label" => "deviceCustomFloatingPoint1Label",
91
+ "cfp2" => "deviceCustomFloatingPoint2",
92
+ "cfp2Label" => "deviceCustomFloatingPoint2Label",
93
+ "cfp3" => "deviceCustomFloatingPoint3",
94
+ "cfp3Label" => "deviceCustomFloatingPoint3Label",
95
+ "cfp4" => "deviceCustomFloatingPoint4",
96
+ "cfp4Label" => "deviceCustomFloatingPoint4Label",
97
+ "cn1" => "deviceCustomNumber1",
98
+ "cn1Label" => "deviceCustomNumber1Label",
99
+ "cn2" => "deviceCustomNumber2",
100
+ "cn2Label" => "deviceCustomNumber2Label",
101
+ "cn3" => "deviceCustomNumber3",
102
+ "cn3Label" => "deviceCustomNumber3Label",
103
+ "cnt" => "baseEventCount",
104
+ "cs1" => "deviceCustomString1",
105
+ "cs1Label" => "deviceCustomString1Label",
106
+ "cs2" => "deviceCustomString2",
107
+ "cs2Label" => "deviceCustomString2Label",
108
+ "cs3" => "deviceCustomString3",
109
+ "cs3Label" => "deviceCustomString3Label",
110
+ "cs4" => "deviceCustomString4",
111
+ "cs4Label" => "deviceCustomString4Label",
112
+ "cs5" => "deviceCustomString5",
113
+ "cs5Label" => "deviceCustomString5Label",
114
+ "cs6" => "deviceCustomString6",
115
+ "cs6Label" => "deviceCustomString6Label",
116
+ "dhost" => "destinationHostName",
117
+ "dmac" => "destinationMacAddress",
118
+ "dntdom" => "destinationNtDomain",
119
+ "dpid" => "destinationProcessId",
120
+ "dpriv" => "destinationUserPrivileges",
121
+ "dproc" => "destinationProcessName",
122
+ "dpt" => "destinationPort",
123
+ "dst" => "destinationAddress",
124
+ "duid" => "destinationUserId",
125
+ "duser" => "destinationUserName",
126
+ "dvc" => "deviceAddress",
127
+ "dvchost" => "deviceHostName",
128
+ "dvcpid" => "deviceProcessId",
129
+ "end" => "endTime",
130
+ "fname" => "fileName",
131
+ "fsize" => "fileSize",
132
+ "in" => "bytesIn",
133
+ "msg" => "message",
134
+ "out" => "bytesOut",
135
+ "outcome" => "eventOutcome",
136
+ "proto" => "transportProtocol",
137
+ "request" => "requestUrl",
138
+ "rt" => "deviceReceiptTime",
139
+ "shost" => "sourceHostName",
140
+ "smac" => "sourceMacAddress",
141
+ "sntdom" => "sourceNtDomain",
142
+ "spid" => "sourceProcessId",
143
+ "spriv" => "sourceUserPrivileges",
144
+ "sproc" => "sourceProcessName",
145
+ "spt" => "sourcePort",
146
+ "src" => "sourceAddress",
147
+ "start" => "startTime",
148
+ "suid" => "sourceUserId",
149
+ "suser" => "sourceUserName",
150
+ "ahost" => "agentHost",
151
+ "art" => "agentReceiptTime",
152
+ "at" => "agentType",
153
+ "aid" => "agentId",
154
+ "_cefVer" => "cefVersion",
155
+ "agt" => "agentAddress",
156
+ "av" => "agentVersion",
157
+ "atz" => "agentTimeZone",
158
+ "dtz" => "destinationTimeZone",
159
+ "slong" => "sourceLongitude",
160
+ "slat" => "sourceLatitude",
161
+ "dlong" => "destinationLongitude",
162
+ "dlat" => "destinationLatitude",
163
+ "catdt" => "categoryDeviceType",
164
+ "mrt" => "managerReceiptTime",
165
+ "amac" => "agentMacAddress"
166
+ }
167
+
168
+ # A CEF Header is a sequence of zero or more:
169
+ # - backslash-escaped pipes; OR
170
+ # - backslash-escaped backslashes; OR
171
+ # - non-pipe characters
172
+ HEADER_PATTERN = /(?:\\\||\\\\|[^|])*?/
173
+
174
+ # Cache of a scanner pattern that _captures_ a HEADER followed by an unescaped pipe
175
+ HEADER_SCANNER = /(#{HEADER_PATTERN})#{Regexp.quote('|')}/
176
+
177
+ # Cache of a gsub pattern that matches a backslash-escaped backslash or backslash-escaped pipe, _capturing_ the escaped character
178
+ HEADER_ESCAPE_CAPTURE = /\\([\\|])/
179
+
180
+ # Cache of a gsub pattern that matches a backslash-escaped backslash or backslash-escaped equals, _capturing_ the escaped character
181
+ EXTENSION_VALUE_ESCAPE_CAPTURE = /\\([\\=])/
182
+
183
+ # While the original CEF spec calls out that extension keys must be alphanumeric and not contain spaces,
184
+ # in practice many "CEF" producers like the Arcsight smart connector produce non-legal keys including underscores,
185
+ # commas, periods, and square-bracketed index offsets.
186
+ # Allow any sequence of characters that are _not_ backslashes, equals, or spaces.
187
+ EXTENSION_KEY_PATTERN = /[^= \\]+/
188
+
189
+ # Some CEF extension keys seen in the wild use an undocumented array-like syntax that may not be compatible with
190
+ # the Event API's strict-mode FieldReference parser (e.g., `fieldname[0]`).
191
+ # Cache of a `String#sub` pattern matching array-like syntax and capturing both the base field name and the
192
+ # array-indexing portion so we can convert to a valid FieldReference (e.g., `[fieldname][0]`).
193
+ EXTENSION_KEY_ARRAY_CAPTURE = /^([^\[\]]+)((?:\[[0-9]+\])+)$/ # '[\1]\2'
194
+
195
+ # In extensions, spaces may be included in an extension value without any escaping,
196
+ # so an extension value is a sequence of zero or more:
197
+ # - non-whitespace character; OR
198
+ # - runs of whitespace that are NOT followed by something that looks like a key-equals sequence
199
+ EXTENSION_VALUE_PATTERN = /(?:\S|\s++(?!#{EXTENSION_KEY_PATTERN}=))*/
200
+
201
+ # Cache of a scanner pattern that _captures_ extension field key/value pairs
202
+ EXTENSION_KEY_VALUE_SCANNER = /(#{EXTENSION_KEY_PATTERN})=(#{EXTENSION_VALUE_PATTERN})\s*/
80
203
 
81
204
  public
82
205
  def initialize(params={})
@@ -96,12 +219,6 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
96
219
  end
97
220
  end
98
221
 
99
- private
100
- def store_header_field(event,field_name,field_data)
101
- #Unescape pipes and backslash in header fields
102
- event.set(field_name,field_data.gsub(/\\\|/, '|').gsub(/\\\\/, '\\')) unless field_data.nil?
103
- end
104
-
105
222
  public
106
223
  def decode(data, &block)
107
224
  if @delimiter
@@ -128,22 +245,24 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
128
245
  data = data[1..-2]
129
246
  end
130
247
 
131
- # Split by the pipes, pipes in the extension part are perfectly valid and do not need escaping
132
- # The better solution for the splitting regex would be /(?<!\\(\\\\)*)[\|]/, but this
133
- # gives an "SyntaxError: (RegexpError) invalid pattern in look-behind" for the variable length look behind.
134
- # Therefore one edge case is not handled properly: \\| (this should split, but it does not, because the escaped \ is not recognized)
135
- # TODO: To solve all unescaping cases, regex is not suitable. A little parse should be written.
136
- split_data = data.split /(?<=[^\\]\\\\)[\|]|(?<!\\)[\|]/
248
+ # Use a scanning parser to capture the HEADER_FIELDS
249
+ unprocessed_data = data
250
+ HEADER_FIELDS.each do |field_name|
251
+ match_data = HEADER_SCANNER.match(unprocessed_data)
252
+ break if match_data.nil? # missing fields
137
253
 
138
- # To be invoked when config settings is set to TRUE for V1 field names (cef_ext.<fieldname>) the following code might be removed in upcoming Codec revision
254
+ escaped_field_value = match_data[1]
255
+ next if escaped_field_value.nil?
139
256
 
140
- # To be invoked with default config settings to utilise the new field name formatting and flatten out the JSON document
141
- # Store header fields
142
- HEADER_FIELDS.each_with_index do |field_name, index|
143
- store_header_field(event,field_name,split_data[index])
257
+ # process legal header escape sequences
258
+ unescaped_field_value = escaped_field_value.gsub(HEADER_ESCAPE_CAPTURE, '\1')
259
+
260
+ event.set(field_name, unescaped_field_value)
261
+ unprocessed_data = match_data.post_match
144
262
  end
263
+
145
264
  #Remainder is message
146
- message = split_data[HEADER_FIELDS.size..-1].join('|')
265
+ message = unprocessed_data
147
266
 
148
267
  # Try and parse out the syslog header if there is one
149
268
  if event.get('cefVersion').include? ' '
@@ -155,36 +274,21 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
155
274
  # Get rid of the CEF bit in the version
156
275
  event.set('cefVersion', event.get('cefVersion').sub(/^CEF:/, ''))
157
276
 
158
- # Strip any whitespace from the message
159
- if not message.nil? and message.include? '='
277
+ # Use a scanning parser to capture the Extension Key/Value Pairs
278
+ if message && message.include?('=')
160
279
  message = message.strip
161
280
 
162
- # If the last KVP has no value, add an empty string, this prevents hash errors below
163
- if message.end_with?('=')
164
- message = message + ' ' unless message.end_with?('\=')
165
- end
281
+ message.scan(EXTENSION_KEY_VALUE_SCANNER) do |extension_field_key, raw_extension_field_value|
282
+ # expand abbreviated extension field keys
283
+ extension_field_key = MAPPINGS.fetch(extension_field_key, extension_field_key)
166
284
 
167
- # Insert custom delimiter to separate key-value pairs, to which some values will contain special characters
168
- # This separator '|^^^' os tested to be unique
169
- message = message.gsub((/(?:(\s+(\w+\=)))/),'|^^^\2')
170
-
171
- # Appropriately tokenizing the additional fields when ArcSight connectors are sending events using "COMPLETE" mode processing.
172
- # If these fields are NOT needed, then set the ArcSight processing mode for this destination to "FASTER" or "FASTEST"
173
- # Refer to ArcSight's SmartConnector user configuration guide
174
- message = message.gsub((/(\s+(\w+\.[^\s]\w+[^\|\s\.\=]+\=))/),'|^^^\2')
175
- message = message.split('|^^^')
176
-
177
- # Replaces the '=' with '***' to avoid conflict with strings with HTML content namely key-value pairs where the values contain HTML strings
178
- # Example : requestUrl = http://<testdomain>:<port>?query=A
179
- for i in 0..message.length-1
180
- message[i] = message[i].sub(/\=/, "***")
181
- message[i] = message[i].gsub(/\\=/, '=').gsub(/\\\\/, '\\')
182
- end
285
+ # convert extension field name to strict legal field_reference, fixing field names with ambiguous array-like syntax
286
+ extension_field_key = extension_field_key.sub(EXTENSION_KEY_ARRAY_CAPTURE, '[\1]\2') if extension_field_key.end_with?(']')
287
+
288
+ # process legal extension field value escapes
289
+ extension_field_value = raw_extension_field_value.gsub(EXTENSION_VALUE_ESCAPE_CAPTURE, '\1')
183
290
 
184
- message = message.map {|s| k, v = s.split('***'); "#{MAPPINGS[k] || k }=#{v}"}
185
- message = message.each_with_object({}) do |k|
186
- key, value = k.split(/\s*=\s*/,2)
187
- event.set(key, value)
291
+ event.set(extension_field_key, extension_field_value)
188
292
  end
189
293
  end
190
294
 
@@ -303,44 +407,4 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
303
407
  rescue TypeError, ArgumentError
304
408
  false
305
409
  end
306
-
307
- def handle_v1_fields(event, split_data)
308
- # Store header fields
309
- DEPRECATED_HEADER_FIELDS.each_with_index do |field_name, index|
310
- store_header_field(event,field_name,split_data[index])
311
- end
312
- #Remainder is message
313
- message = split_data[DEPRECATED_HEADER_FIELDS.size..-1].join('|')
314
-
315
- # Try and parse out the syslog header if there is one
316
- if event.get('cef_version').include? ' '
317
- split_cef_version= event.get('cef_version').rpartition(' ')
318
- event.set('syslog', split_cef_version[0])
319
- event.set('cef_version',split_cef_version[2])
320
- end
321
-
322
- # Get rid of the CEF bit in the version
323
- event.set('cef_version', event.get('cef_version').sub(/^CEF:/, ''))
324
-
325
- # Strip any whitespace from the message
326
- if not message.nil? and message.include? '='
327
- message = message.strip
328
-
329
- # If the last KVP has no value, add an empty string, this prevents hash errors below
330
- if message.end_with?('=')
331
- message=message + ' ' unless message.end_with?('\=')
332
- end
333
-
334
- # Now parse the key value pairs into it
335
- extensions = {}
336
- message = message.split(/ ([\w\.]+)=/)
337
- key, value = message.shift.split('=', 2)
338
- extensions[key] = value.gsub(/\\=/, '=').gsub(/\\\\/, '\\')
339
- Hash[*message].each{ |k, v| extensions[k] = v }
340
- # And save the new has as the extensions
341
- event.set('cef_ext', extensions)
342
- end
343
-
344
- end
345
-
346
410
  end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-codec-cef'
4
- s.version = '5.0.3'
4
+ s.version = '5.0.4'
5
5
  s.platform = 'java'
6
6
  s.licenses = ['Apache License (2.0)']
7
7
  s.summary = "Reads the ArcSight Common Event Format (CEF)."
@@ -296,6 +296,49 @@ describe LogStash::Codecs::CEF do
296
296
  insist { e.get('severity') } == "10"
297
297
  end
298
298
 
299
+ ##
300
+ # Use the given codec to decode the given data, ensuring exactly one event is emitted.
301
+ #
302
+ # If a block is given, yield the resulting event to the block _outside_ of `LogStash::Codecs::CEF#decode(String)`
303
+ # in order to avoid mismatched-exceptions raised by RSpec triggering the codec's exception-handling.
304
+ #
305
+ # @param codec [#decode]
306
+ # @param data [String]
307
+ # @yieldparam event [Event]
308
+ # @yieldreturn [void]
309
+ # @return [Event]
310
+ def decode_one(codec, data)
311
+ events = do_decode(codec, data)
312
+ fail("Expected one event, got #{events.size} events: #{events.inspect}") unless events.size == 1
313
+ event = events.first
314
+
315
+ yield event if block_given?
316
+
317
+ event
318
+ end
319
+
320
+ ##
321
+ # Use the given codec to decode the given data, returning an Array of the resulting Events
322
+ #
323
+ # If a block is given, each event is yielded to the block _outside_ of `LogStash::Codecs::CEF#decode(String)`
324
+ # in order to avoid mismatched-exceptions raised by RSpec triggering the codec's exception-handling.
325
+ #
326
+ # @param codec [#decode]
327
+ # @param data [String]
328
+ # @yieldparam event [Event]
329
+ # @yieldreturn [void]
330
+ # @return [Array<Event>]
331
+ def do_decode(codec, data)
332
+ events = []
333
+ codec.decode(data) do |event|
334
+ events << event
335
+ end
336
+
337
+ events.each { |event| yield event } if block_given?
338
+
339
+ events
340
+ end
341
+
299
342
  context "with delimiter set" do
300
343
  # '\r\n' in single quotes to simulate the real input from a config
301
344
  # containing \r\n as 4-character sequence in the config:
@@ -306,24 +349,36 @@ describe LogStash::Codecs::CEF do
306
349
  subject(:codec) { LogStash::Codecs::CEF.new("delimiter" => '\r\n') }
307
350
 
308
351
  it "should parse on the delimiter " do
309
- subject.decode(message) do |e|
352
+ do_decode(subject,message) do |e|
310
353
  raise Exception.new("Should not get here. If we do, it means the decoder emitted an event before the delimiter was seen?")
311
354
  end
312
355
 
313
- event = false;
314
- subject.decode("\r\n") do |e|
356
+ decode_one(subject, "\r\n") do |e|
315
357
  validate(e)
316
358
  insist { e.get("deviceVendor") } == "security"
317
359
  insist { e.get("deviceProduct") } == "threatmanager"
318
- event = true
319
360
  end
361
+ end
362
+ end
363
+
364
+ context 'when a CEF header ends with a pair of properly-escaped backslashes' do
365
+ let(:backslash) { '\\' }
366
+ let(:pipe) { '|' }
367
+ let(:message) { "CEF:0|security|threatmanager|1.0|100|double backslash" +
368
+ backslash + backslash + # escaped backslash
369
+ backslash + backslash + # escaped backslash
370
+ "|10|src=10.0.0.192 dst=12.121.122.82 spt=1232" }
320
371
 
321
- expect(event).to be_truthy
372
+ it 'should include the backslashes unescaped' do
373
+ event = decode_one(subject, message)
374
+
375
+ expect(event.get('name')).to eq('double backslash' + backslash + backslash )
376
+ expect(event.get('severity')).to eq('10') # ensure we didn't consume the separator
322
377
  end
323
378
  end
324
379
 
325
380
  it "should parse the cef headers" do
326
- subject.decode(message) do |e|
381
+ decode_one(subject, message) do |e|
327
382
  validate(e)
328
383
  insist { e.get("deviceVendor") } == "security"
329
384
  insist { e.get("deviceProduct") } == "threatmanager"
@@ -331,7 +386,7 @@ describe LogStash::Codecs::CEF do
331
386
  end
332
387
 
333
388
  it "should parse the cef body" do
334
- subject.decode(message) do |e|
389
+ decode_one(subject, message) do |e|
335
390
  insist { e.get("sourceAddress")} == "10.0.0.192"
336
391
  insist { e.get("destinationAddress") } == "12.121.122.82"
337
392
  insist { e.get("sourcePort") } == "1232"
@@ -340,7 +395,7 @@ describe LogStash::Codecs::CEF do
340
395
 
341
396
  let (:missing_headers) { "CEF:0|||1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232" }
342
397
  it "should be OK with missing CEF headers (multiple pipes in sequence)" do
343
- subject.decode(missing_headers) do |e|
398
+ decode_one(subject, missing_headers) do |e|
344
399
  validate(e)
345
400
  insist { e.get("deviceVendor") } == ""
346
401
  insist { e.get("deviceProduct") } == ""
@@ -349,35 +404,50 @@ describe LogStash::Codecs::CEF do
349
404
 
350
405
  let (:leading_whitespace) { "CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232" }
351
406
  it "should strip leading whitespace from the message" do
352
- subject.decode(leading_whitespace) do |e|
407
+ decode_one(subject, leading_whitespace) do |e|
353
408
  validate(e)
354
409
  end
355
410
  end
356
411
 
357
412
  let (:escaped_pipes) { 'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this\|has an escaped pipe' }
358
413
  it "should be OK with escaped pipes in the message" do
359
- subject.decode(escaped_pipes) do |e|
414
+ decode_one(subject, escaped_pipes) do |e|
360
415
  insist { e.get("moo") } == 'this\|has an escaped pipe'
361
416
  end
362
417
  end
363
418
 
364
419
  let (:pipes_in_message) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this|has an pipe'}
365
420
  it "should be OK with not escaped pipes in the message" do
366
- subject.decode(pipes_in_message) do |e|
421
+ decode_one(subject, pipes_in_message) do |e|
367
422
  insist { e.get("moo") } == 'this|has an pipe'
368
423
  end
369
424
  end
370
425
 
426
+ # while we may see these in practice, equals MUST be escaped in the extensions per the spec.
371
427
  let (:equal_in_message) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this =has = equals\='}
372
428
  it "should be OK with equal in the message" do
373
- subject.decode(equal_in_message) do |e|
429
+ decode_one(subject, equal_in_message) do |e|
374
430
  insist { e.get("moo") } == 'this =has = equals='
375
431
  end
376
432
  end
377
433
 
434
+ context('escaped-equals and unescaped-spaces in the extension values') do
435
+ let(:query_string) { 'key1=value1&key2=value3 aa.bc&key3=value4'}
436
+ let(:escaped_query_string) { query_string.gsub('=','\\=') }
437
+ let(:cef_message) { "CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|go=start now query_string=#{escaped_query_string} final=done" }
438
+
439
+ it 'captures the extension values correctly' do
440
+ event = decode_one(subject, cef_message)
441
+
442
+ expect(event.get('go')).to eq('start now')
443
+ expect(event.get('query_string')).to eq(query_string)
444
+ expect(event.get('final')).to eq('done')
445
+ end
446
+ end
447
+
378
448
  let (:escaped_backslash_in_header) {'CEF:0|secu\\\\rity|threat\\\\manager|1.\\\\0|10\\\\0|tro\\\\jan successfully stopped|\\\\10|'}
379
449
  it "should be OK with escaped backslash in the headers" do
380
- subject.decode(escaped_backslash_in_header) do |e|
450
+ decode_one(subject, escaped_backslash_in_header) do |e|
381
451
  insist { e.get("cefVersion") } == '0'
382
452
  insist { e.get("deviceVendor") } == 'secu\\rity'
383
453
  insist { e.get("deviceProduct") } == 'threat\\manager'
@@ -390,7 +460,7 @@ describe LogStash::Codecs::CEF do
390
460
 
391
461
  let (:escaped_backslash_in_header_edge_case) {'CEF:0|security\\\\\\||threatmanager\\\\|1.0|100|trojan successfully stopped|10|'}
392
462
  it "should be OK with escaped backslash in the headers (edge case: escaped slash in front of pipe)" do
393
- subject.decode(escaped_backslash_in_header_edge_case) do |e|
463
+ decode_one(subject, escaped_backslash_in_header_edge_case) do |e|
394
464
  validate(e)
395
465
  insist { e.get("deviceVendor") } == 'security\\|'
396
466
  insist { e.get("deviceProduct") } == 'threatmanager\\'
@@ -399,7 +469,7 @@ describe LogStash::Codecs::CEF do
399
469
 
400
470
  let (:escaped_pipes_in_header) {'CEF:0|secu\\|rity|threatmanager\\||1.\\|0|10\\|0|tro\\|jan successfully stopped|\\|10|'}
401
471
  it "should be OK with escaped pipes in the headers" do
402
- subject.decode(escaped_pipes_in_header) do |e|
472
+ decode_one(subject, escaped_pipes_in_header) do |e|
403
473
  insist { e.get("cefVersion") } == '0'
404
474
  insist { e.get("deviceVendor") } == 'secu|rity'
405
475
  insist { e.get("deviceProduct") } == 'threatmanager|'
@@ -412,14 +482,14 @@ describe LogStash::Codecs::CEF do
412
482
 
413
483
  let (:backslash_in_message) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|moo=this \\has \\ backslashs\\'}
414
484
  it "should be OK with backslashs in the message" do
415
- subject.decode(backslash_in_message) do |e|
485
+ decode_one(subject, backslash_in_message) do |e|
416
486
  insist { e.get("moo") } == 'this \\has \\ backslashs\\'
417
487
  end
418
488
  end
419
489
 
420
490
  let (:equal_in_header) {'CEF:0|security|threatmanager=equal|1.0|100|trojan successfully stopped|10|'}
421
491
  it "should be OK with equal in the headers" do
422
- subject.decode(equal_in_header) do |e|
492
+ decode_one(subject, equal_in_header) do |e|
423
493
  validate(e)
424
494
  insist { e.get("deviceProduct") } == "threatmanager=equal"
425
495
  end
@@ -427,7 +497,7 @@ describe LogStash::Codecs::CEF do
427
497
 
428
498
  let (:spaces_in_between_keys) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10| src=10.0.0.192 dst=12.121.122.82 spt=1232'}
429
499
  it "should be OK to have one or more spaces between keys" do
430
- subject.decode(spaces_in_between_keys) do |e|
500
+ decode_one(subject, spaces_in_between_keys) do |e|
431
501
  validate(e)
432
502
  insist { e.get("sourceAddress") } == "10.0.0.192"
433
503
  insist { e.get("destinationAddress") } == "12.121.122.82"
@@ -437,7 +507,7 @@ describe LogStash::Codecs::CEF do
437
507
 
438
508
  let (:allow_spaces_in_values) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232 dproc=InternetExplorer x.x.x.x'}
439
509
  it "should be OK to have one or more spaces in values" do
440
- subject.decode(allow_spaces_in_values) do |e|
510
+ decode_one(subject, allow_spaces_in_values) do |e|
441
511
  validate(e)
442
512
  insist { e.get("sourceAddress") } == "10.0.0.192"
443
513
  insist { e.get("destinationAddress") } == "12.121.122.82"
@@ -448,26 +518,26 @@ describe LogStash::Codecs::CEF do
448
518
 
449
519
  let (:preserve_additional_fields_with_dot_notations) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 additional.dotfieldName=new_value ad.Authentification=MICROSOFT_AUTHENTICATION_PACKAGE_V1_0 ad.Error_,Code=3221225578 dst=12.121.122.82 ad.field[0]=field0 ad.name[1]=new_name'}
450
520
  it "should keep ad.fields" do
451
- subject.decode(preserve_additional_fields_with_dot_notations) do |e|
521
+ decode_one(subject, preserve_additional_fields_with_dot_notations) do |e|
452
522
  validate(e)
453
523
  insist { e.get("sourceAddress") } == "10.0.0.192"
454
524
  insist { e.get("destinationAddress") } == "12.121.122.82"
455
- insist { e.get("ad.field[0]") } == "field0"
456
- insist { e.get("ad.name[1]") } == "new_name"
525
+ insist { e.get("[ad.field][0]") } == "field0"
526
+ insist { e.get("[ad.name][1]") } == "new_name"
457
527
  insist { e.get("ad.Authentification") } == "MICROSOFT_AUTHENTICATION_PACKAGE_V1_0"
458
- insist { e.get("ad.Error_,Code") } == "3221225578"
528
+ insist { e.get('ad.Error_,Code') } == "3221225578"
459
529
  insist { e.get("additional.dotfieldName") } == "new_value"
460
530
  end
461
531
  end
462
532
 
463
533
  let (:preserve_random_values_key_value_pairs_alongside_with_additional_fields) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 cs4=401 random.user Admin 0 23041A10181C0000 23041810181C0000 /CN\=random.user/OU\=User Login End-Entity /CN\=TEST/OU\=Login CA TEST 34 additional.dotfieldName=new_value ad.Authentification=MICROSOFT_AUTHENTICATION_PACKAGE_V1_0 ad.Error_,Code=3221225578 dst=12.121.122.82 ad.field[0]=field0 ad.name[1]=new_name'}
464
534
  it "should correctly parse random values even with additional fields in message" do
465
- subject.decode(preserve_random_values_key_value_pairs_alongside_with_additional_fields) do |e|
535
+ decode_one(subject, preserve_random_values_key_value_pairs_alongside_with_additional_fields) do |e|
466
536
  validate(e)
467
537
  insist { e.get("sourceAddress") } == "10.0.0.192"
468
538
  insist { e.get("destinationAddress") } == "12.121.122.82"
469
- insist { e.get("ad.field[0]") } == "field0"
470
- insist { e.get("ad.name[1]") } == "new_name"
539
+ insist { e.get("[ad.field][0]") } == "field0"
540
+ insist { e.get("[ad.name][1]") } == "new_name"
471
541
  insist { e.get("ad.Authentification") } == "MICROSOFT_AUTHENTICATION_PACKAGE_V1_0"
472
542
  insist { e.get("ad.Error_,Code") } == "3221225578"
473
543
  insist { e.get("additional.dotfieldName") } == "new_value"
@@ -477,7 +547,7 @@ describe LogStash::Codecs::CEF do
477
547
 
478
548
  let (:preserve_unmatched_key_mappings) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 new_key_by_device=new_values here'}
479
549
  it "should preserve unmatched key mappings" do
480
- subject.decode(preserve_unmatched_key_mappings) do |e|
550
+ decode_one(subject, preserve_unmatched_key_mappings) do |e|
481
551
  validate(e)
482
552
  insist { e.get("sourceAddress") } == "10.0.0.192"
483
553
  insist { e.get("destinationAddress") } == "12.121.122.82"
@@ -487,7 +557,7 @@ describe LogStash::Codecs::CEF do
487
557
 
488
558
  let (:translate_abbreviated_cef_fields) {'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 proto=TCP shost=source.host.name dhost=destination.host.name spt=11024 dpt=9200 outcome=Success amac=00:80:48:1c:24:91'}
489
559
  it "should translate most known abbreviated CEF field names" do
490
- subject.decode(translate_abbreviated_cef_fields) do |e|
560
+ decode_one(subject, translate_abbreviated_cef_fields) do |e|
491
561
  validate(e)
492
562
  insist { e.get("sourceAddress") } == "10.0.0.192"
493
563
  insist { e.get("destinationAddress") } == "12.121.122.82"
@@ -503,7 +573,7 @@ describe LogStash::Codecs::CEF do
503
573
 
504
574
  let (:syslog) { "Syslogdate Sysloghost CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=10.0.0.192 dst=12.121.122.82 spt=1232" }
505
575
  it "Should detect headers before CEF starts" do
506
- subject.decode(syslog) do |e|
576
+ decode_one(subject, syslog) do |e|
507
577
  validate(e)
508
578
  insist { e.get('syslog') } == 'Syslogdate Sysloghost'
509
579
  end
@@ -522,7 +592,7 @@ describe LogStash::Codecs::CEF do
522
592
  context "externally encoded as #{external_encoding}" do
523
593
  let(:message) { super().force_encoding(external_encoding) }
524
594
  it 'should keep the higher-plane characters' do
525
- subject.decode(message.dup) do |event|
595
+ decode_one(subject, message.dup) do |event|
526
596
  validate(event)
527
597
  insist { event.get("target") } == "aaaaaああああaaaa"
528
598
  insist { event.get("target").encoding } == Encoding::UTF_8
@@ -535,7 +605,7 @@ describe LogStash::Codecs::CEF do
535
605
  context 'non-UTF-8 message' do
536
606
  let(:message) { 'CEF:0|security|threatmanager|1.0|100|trojan successfully stopped|10|src=192.168.1.11 target=aaaaaああああaaaa msg=Description Omitted'.encode('SHIFT_JIS') }
537
607
  it 'should emit message unparsed with _cefparsefailure tag' do
538
- subject.decode(message.dup) do |event|
608
+ decode_one(subject, message.dup) do |event|
539
609
  insist { event.get("message").bytes.to_a } == message.bytes.to_a
540
610
  insist { event.get("tags") } == ['_cefparsefailure']
541
611
  end
@@ -546,7 +616,7 @@ describe LogStash::Codecs::CEF do
546
616
  subject(:codec) { LogStash::Codecs::CEF.new("raw_data_field" => "message_raw") }
547
617
 
548
618
  it "should return the raw message in field message_raw" do
549
- subject.decode(message.dup) do |e|
619
+ decode_one(subject, message.dup) do |e|
550
620
  validate(e)
551
621
  insist { e.get("message_raw") } == message
552
622
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-cef
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.3
4
+ version: 5.0.4
5
5
  platform: java
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-19 00:00:00.000000000 Z
11
+ date: 2018-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement