logstash-codec-cef 6.0.0-java → 6.2.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,6 +3,9 @@ require "logstash/util/buftok"
3
3
  require "logstash/util/charset"
4
4
  require "logstash/codecs/base"
5
5
  require "json"
6
+ require "time"
7
+
8
+ require 'logstash/plugin_mixins/ecs_compatibility_support'
6
9
 
7
10
  # Implementation of a Logstash codec for the ArcSight Common Event Format (CEF)
8
11
  # Based on Revision 20 of Implementing ArcSight CEF, dated from June 05, 2013
@@ -13,6 +16,10 @@ require "json"
13
16
  class LogStash::Codecs::CEF < LogStash::Codecs::Base
14
17
  config_name "cef"
15
18
 
19
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1)
20
+
21
+ InvalidTimestamp = Class.new(StandardError)
22
+
16
23
  # Device vendor field in CEF header. The new value can include `%{foo}` strings
17
24
  # to help you build a new value from other parts of the event.
18
25
  config :vendor, :validate => :string, :default => "Elasticsearch"
@@ -68,106 +75,24 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
68
75
  # * `\\n` (backslash "n") - means newline (ASCII 0x0A)
69
76
  config :delimiter, :validate => :string
70
77
 
78
+ # When parsing timestamps that do not include a UTC offset in payloads that do not
79
+ # include the device's timezone, the default timezone is used.
80
+ # If none is provided the system timezone is used.
81
+ config :default_timezone, :validate => :string
82
+
83
+ # The locale is used to parse abbreviated month names from some CEF timestamp
84
+ # formats.
85
+ # If none is provided, the system default is used.
86
+ config :locale, :validate => :string
87
+
71
88
  # If raw_data_field is set, during decode of an event an additional field with
72
89
  # the provided name is added, which contains the raw data.
73
90
  config :raw_data_field, :validate => :string
74
91
 
75
- HEADER_FIELDS = ['cefVersion','deviceVendor','deviceProduct','deviceVersion','deviceEventClassId','name','severity']
76
-
77
- # Translating and flattening the CEF extensions with known field names as documented in the Common Event Format whitepaper
78
- MAPPINGS = {
79
- "act" => "deviceAction",
80
- "app" => "applicationProtocol",
81
- "c6a1" => "deviceCustomIPv6Address1",
82
- "c6a1Label" => "deviceCustomIPv6Address1Label",
83
- "c6a2" => "deviceCustomIPv6Address2",
84
- "c6a2Label" => "deviceCustomIPv6Address2Label",
85
- "c6a3" => "deviceCustomIPv6Address3",
86
- "c6a3Label" => "deviceCustomIPv6Address3Label",
87
- "c6a4" => "deviceCustomIPv6Address4",
88
- "c6a4Label" => "deviceCustomIPv6Address4Label",
89
- "cat" => "deviceEventCategory",
90
- "cfp1" => "deviceCustomFloatingPoint1",
91
- "cfp1Label" => "deviceCustomFloatingPoint1Label",
92
- "cfp2" => "deviceCustomFloatingPoint2",
93
- "cfp2Label" => "deviceCustomFloatingPoint2Label",
94
- "cfp3" => "deviceCustomFloatingPoint3",
95
- "cfp3Label" => "deviceCustomFloatingPoint3Label",
96
- "cfp4" => "deviceCustomFloatingPoint4",
97
- "cfp4Label" => "deviceCustomFloatingPoint4Label",
98
- "cn1" => "deviceCustomNumber1",
99
- "cn1Label" => "deviceCustomNumber1Label",
100
- "cn2" => "deviceCustomNumber2",
101
- "cn2Label" => "deviceCustomNumber2Label",
102
- "cn3" => "deviceCustomNumber3",
103
- "cn3Label" => "deviceCustomNumber3Label",
104
- "cnt" => "baseEventCount",
105
- "cs1" => "deviceCustomString1",
106
- "cs1Label" => "deviceCustomString1Label",
107
- "cs2" => "deviceCustomString2",
108
- "cs2Label" => "deviceCustomString2Label",
109
- "cs3" => "deviceCustomString3",
110
- "cs3Label" => "deviceCustomString3Label",
111
- "cs4" => "deviceCustomString4",
112
- "cs4Label" => "deviceCustomString4Label",
113
- "cs5" => "deviceCustomString5",
114
- "cs5Label" => "deviceCustomString5Label",
115
- "cs6" => "deviceCustomString6",
116
- "cs6Label" => "deviceCustomString6Label",
117
- "dhost" => "destinationHostName",
118
- "dmac" => "destinationMacAddress",
119
- "dntdom" => "destinationNtDomain",
120
- "dpid" => "destinationProcessId",
121
- "dpriv" => "destinationUserPrivileges",
122
- "dproc" => "destinationProcessName",
123
- "dpt" => "destinationPort",
124
- "dst" => "destinationAddress",
125
- "duid" => "destinationUserId",
126
- "duser" => "destinationUserName",
127
- "dvc" => "deviceAddress",
128
- "dvchost" => "deviceHostName",
129
- "dvcpid" => "deviceProcessId",
130
- "end" => "endTime",
131
- "fname" => "fileName",
132
- "fsize" => "fileSize",
133
- "in" => "bytesIn",
134
- "msg" => "message",
135
- "out" => "bytesOut",
136
- "outcome" => "eventOutcome",
137
- "proto" => "transportProtocol",
138
- "request" => "requestUrl",
139
- "rt" => "deviceReceiptTime",
140
- "shost" => "sourceHostName",
141
- "smac" => "sourceMacAddress",
142
- "sntdom" => "sourceNtDomain",
143
- "spid" => "sourceProcessId",
144
- "spriv" => "sourceUserPrivileges",
145
- "sproc" => "sourceProcessName",
146
- "spt" => "sourcePort",
147
- "src" => "sourceAddress",
148
- "start" => "startTime",
149
- "suid" => "sourceUserId",
150
- "suser" => "sourceUserName",
151
- "ahost" => "agentHost",
152
- "art" => "agentReceiptTime",
153
- "at" => "agentType",
154
- "aid" => "agentId",
155
- "_cefVer" => "cefVersion",
156
- "agt" => "agentAddress",
157
- "av" => "agentVersion",
158
- "atz" => "agentTimeZone",
159
- "dtz" => "destinationTimeZone",
160
- "slong" => "sourceLongitude",
161
- "slat" => "sourceLatitude",
162
- "dlong" => "destinationLongitude",
163
- "dlat" => "destinationLatitude",
164
- "catdt" => "categoryDeviceType",
165
- "mrt" => "managerReceiptTime",
166
- "amac" => "agentMacAddress"
167
- }
168
-
169
- # Reverse mapping of CEF full field names to CEF extensions field names for encoding into a CEF event for output.
170
- REVERSE_MAPPINGS = MAPPINGS.invert
92
+ # Defines whether a set of device-specific CEF fields represent the _observer_,
93
+ # or the actual `host` on which the event occurred. If this codec handles a mix,
94
+ # it is safe to use the default `observer`.
95
+ config :device, :validate => %w(observer host), :default => 'observer'
171
96
 
172
97
  # A CEF Header is a sequence of zero or more:
173
98
  # - backslash-escaped pipes; OR
@@ -189,13 +114,16 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
189
114
  # commas, periods, and square-bracketed index offsets.
190
115
  #
191
116
  # To support this, we look for a specific sequence of characters that are followed by an equals sign. This pattern
192
- # will correctly identify all strictly-legal keys, and will also match those that include a dot "subkey"
117
+ # will correctly identify all strictly-legal keys, and will also match those that include a dot-joined "subkeys" and
118
+ # square-bracketed array indexing
193
119
  #
194
120
  # That sequence must begin with one or more `\w` (word: alphanumeric + underscore), which _optionally_ may be followed
195
- # by "subkey" sequence consisting of a literal dot (`.`) followed by a non-whitespace character, then one or more word
196
- # characters, and then one or more characters that do not convey semantic meaning within CEF (e.g., literal-pipe (`|`),
197
- # whitespace (`\s`), literal-dot (`.`), literal-equals (`=`), or literal-backslash ('\')).
198
- EXTENSION_KEY_PATTERN = /(?:\w+(?:\.[^\s]\w+[^\|\s\.\=\\]+)?(?==))/
121
+ # by one or more "subkey" sequences and an optional square-bracketed index.
122
+ #
123
+ # To be understood by this implementation, a "subkey" sequence must consist of a literal dot (`.`) followed by one or
124
+ # more characters that do not convey semantic meaning within CEF (e.g., literal-dot (`.`), literal-equals (`=`),
125
+ # whitespace (`\s`), literal-pipe (`|`), literal-backslash ('\'), or literal-square brackets (`[` or `]`)).
126
+ EXTENSION_KEY_PATTERN = /(?:\w+(?:\.[^\.=\s\|\\\[\]]+)*(?:\[[0-9]+\])?(?==))/
199
127
 
200
128
  # Some CEF extension keys seen in the wild use an undocumented array-like syntax that may not be compatible with
201
129
  # the Event API's strict-mode FieldReference parser (e.g., `fieldname[0]`).
@@ -212,6 +140,30 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
212
140
  # Cache of a scanner pattern that _captures_ extension field key/value pairs
213
141
  EXTENSION_KEY_VALUE_SCANNER = /(#{EXTENSION_KEY_PATTERN})=(#{EXTENSION_VALUE_PATTERN})\s*/
214
142
 
143
+ ##
144
+ # @see CEF#sanitize_header_field
145
+ HEADER_FIELD_SANITIZER_MAPPING = {
146
+ "\\" => "\\\\",
147
+ "|" => "\\|",
148
+ "\n" => " ",
149
+ "\r" => " ",
150
+ }
151
+ HEADER_FIELD_SANITIZER_PATTERN = Regexp.union(HEADER_FIELD_SANITIZER_MAPPING.keys)
152
+ private_constant :HEADER_FIELD_SANITIZER_MAPPING, :HEADER_FIELD_SANITIZER_PATTERN
153
+
154
+ ##
155
+ # @see CEF#sanitize_extension_val
156
+ EXTENSION_VALUE_SANITIZER_MAPPING = {
157
+ "\\" => "\\\\",
158
+ "=" => "\\=",
159
+ "\n" => "\\n",
160
+ "\r" => "\\n",
161
+ }
162
+ EXTENSION_VALUE_SANITIZER_PATTERN = Regexp.union(EXTENSION_VALUE_SANITIZER_MAPPING.keys)
163
+ private_constant :EXTENSION_VALUE_SANITIZER_MAPPING, :EXTENSION_VALUE_SANITIZER_PATTERN
164
+
165
+ CEF_PREFIX = 'CEF:'.freeze
166
+
215
167
  public
216
168
  def initialize(params={})
217
169
  super(params)
@@ -228,6 +180,12 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
228
180
  @delimiter = @delimiter.gsub("\\r", "\r").gsub("\\n", "\n")
229
181
  @buffer = FileWatch::BufferedTokenizer.new(@delimiter)
230
182
  end
183
+
184
+ require_relative 'cef/timestamp_normalizer'
185
+ @timestamp_normalzer = TimestampNormalizer.new(locale: @locale, timezone: @default_timezone)
186
+
187
+ generate_header_fields!
188
+ generate_mappings!
231
189
  end
232
190
 
233
191
  public
@@ -242,6 +200,7 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
242
200
  end
243
201
 
244
202
  def handle(data, &block)
203
+ original_data = data.dup
245
204
  event = LogStash::Event.new
246
205
  event.set(raw_data_field, data) unless raw_data_field.nil?
247
206
 
@@ -258,7 +217,7 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
258
217
 
259
218
  # Use a scanning parser to capture the HEADER_FIELDS
260
219
  unprocessed_data = data
261
- HEADER_FIELDS.each do |field_name|
220
+ @header_fields.each do |field_name|
262
221
  match_data = HEADER_SCANNER.match(unprocessed_data)
263
222
  break if match_data.nil? # missing fields
264
223
 
@@ -276,22 +235,24 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
276
235
  message = unprocessed_data
277
236
 
278
237
  # Try and parse out the syslog header if there is one
279
- if event.get('cefVersion').include? ' '
280
- split_cef_version= event.get('cefVersion').rpartition(' ')
281
- event.set('syslog', split_cef_version[0])
282
- event.set('cefVersion',split_cef_version[2])
238
+ cef_version_field = @header_fields[0]
239
+ if (cef_version = event.get(cef_version_field)).include?(' ')
240
+ split_cef_version = cef_version.rpartition(' ')
241
+ event.set(@syslog_header, split_cef_version[0])
242
+ event.set(cef_version_field, split_cef_version[2])
283
243
  end
284
244
 
285
245
  # Get rid of the CEF bit in the version
286
- event.set('cefVersion', event.get('cefVersion').sub(/^CEF:/, ''))
246
+ event.set(cef_version_field, delete_cef_prefix(event.get(cef_version_field)))
287
247
 
288
248
  # Use a scanning parser to capture the Extension Key/Value Pairs
289
249
  if message && message.include?('=')
290
250
  message = message.strip
251
+ extension_fields = {}
291
252
 
292
253
  message.scan(EXTENSION_KEY_VALUE_SCANNER) do |extension_field_key, raw_extension_field_value|
293
254
  # expand abbreviated extension field keys
294
- extension_field_key = MAPPINGS.fetch(extension_field_key, extension_field_key)
255
+ extension_field_key = @decode_mapping.fetch(extension_field_key, extension_field_key)
295
256
 
296
257
  # convert extension field name to strict legal field_reference, fixing field names with ambiguous array-like syntax
297
258
  extension_field_key = extension_field_key.sub(EXTENSION_KEY_ARRAY_CAPTURE, '[\1]\2') if extension_field_key.end_with?(']')
@@ -299,13 +260,28 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
299
260
  # process legal extension field value escapes
300
261
  extension_field_value = raw_extension_field_value.gsub(EXTENSION_VALUE_ESCAPE_CAPTURE, '\1')
301
262
 
302
- event.set(extension_field_key, extension_field_value)
263
+ extension_fields[extension_field_key] = extension_field_value
264
+ end
265
+
266
+ # in ECS mode, normalize timestamps including timezone.
267
+ if ecs_compatibility != :disabled
268
+ device_timezone = extension_fields['[event][timezone]']
269
+ @timestamp_fields.each do |timestamp_field_name|
270
+ raw_timestamp = extension_fields.delete(timestamp_field_name) or next
271
+ value = normalize_timestamp(raw_timestamp, device_timezone)
272
+ event.set(timestamp_field_name, value)
273
+ end
274
+ end
275
+
276
+ extension_fields.each do |field_key, field_value|
277
+ event.set(field_key, field_value)
303
278
  end
304
279
  end
305
280
 
306
281
  yield event
307
282
  rescue => e
308
- @logger.error("Failed to decode CEF payload. Generating failure event with payload in message field.", :error => e.message, :backtrace => e.backtrace, :data => data)
283
+ @logger.error("Failed to decode CEF payload. Generating failure event with payload in message field.",
284
+ :exception => e.class, :message => e.message, :backtrace => e.backtrace, :original_data => original_data)
309
285
  yield LogStash::Event.new("message" => data, "tags" => ["_cefparsefailure"])
310
286
  end
311
287
 
@@ -314,79 +290,290 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
314
290
  # "CEF:0|Elasticsearch|Logstash|1.0|Signature|Name|Sev|"
315
291
 
316
292
  vendor = sanitize_header_field(event.sprintf(@vendor))
317
- vendor = self.class.get_config["vendor"][:default] if vendor == ""
293
+ vendor = self.class.get_config["vendor"][:default] if vendor.empty?
318
294
 
319
295
  product = sanitize_header_field(event.sprintf(@product))
320
- product = self.class.get_config["product"][:default] if product == ""
296
+ product = self.class.get_config["product"][:default] if product.empty?
321
297
 
322
298
  version = sanitize_header_field(event.sprintf(@version))
323
- version = self.class.get_config["version"][:default] if version == ""
299
+ version = self.class.get_config["version"][:default] if version.empty?
324
300
 
325
301
  signature = sanitize_header_field(event.sprintf(@signature))
326
- signature = self.class.get_config["signature"][:default] if signature == ""
302
+ signature = self.class.get_config["signature"][:default] if signature.empty?
327
303
 
328
304
  name = sanitize_header_field(event.sprintf(@name))
329
- name = self.class.get_config["name"][:default] if name == ""
305
+ name = self.class.get_config["name"][:default] if name.empty?
330
306
 
331
307
  severity = sanitize_severity(event, @severity)
332
308
 
333
309
  # Should also probably set the fields sent
334
310
  header = ["CEF:0", vendor, product, version, signature, name, severity].join("|")
335
- values = @fields.map {|fieldname| get_value(fieldname, event)}.compact.join(" ")
311
+ values = @fields.map { |fieldname| get_value(fieldname, event) }.compact.join(" ")
336
312
 
337
313
  @on_event.call(event, "#{header}|#{values}#{@delimiter}")
338
314
  end
339
315
 
340
316
  private
341
317
 
318
+ def generate_header_fields!
319
+ # @header_fields is an _ordered_ set of fields.
320
+ @header_fields = [
321
+ ecs_select[disabled: 'cefVersion', v1: '[cef][version]'],
322
+ ecs_select[disabled: 'deviceVendor', v1: '[observer][vendor]'],
323
+ ecs_select[disabled: 'deviceProduct', v1: '[observer][product]'],
324
+ ecs_select[disabled: 'deviceVersion', v1: '[observer][version]'],
325
+ ecs_select[disabled: 'deviceEventClassId', v1: '[event][code]'],
326
+ ecs_select[disabled: 'name', v1: '[cef][name]'],
327
+ ecs_select[disabled: 'severity', v1: '[event][severity]']
328
+ ].map(&:freeze).freeze
329
+ # the @syslog_header is the field name used when a syslog header preceeds the CEF Version.
330
+ @syslog_header = ecs_select[disabled:'syslog',v1:'[log][syslog][header]']
331
+ end
332
+
333
+ class CEFField
334
+ ##
335
+ # @param name [String]: the full CEF name of a field
336
+ # @param key [String] (optional): an abbreviated CEF key to use when encoding a value with `reverse_mapping => true`
337
+ # when left unspecified, the `key` is the field's `name`.
338
+ # @param ecs_field [String] (optional): an ECS-compatible field reference to use, with square-bracket syntax.
339
+ # when left unspecified, the `ecs_field` is the field's `name`.
340
+ # @param legacy [String] (optional): a legacy CEF name to support in pass-through.
341
+ # in decoding mode without ECS, field name will be used as-provided.
342
+ # in encoding mode without ECS when provided to `fields` and `reverse_mapping => false`,
343
+ # field name will be used as-provided.
344
+ # @param priority [Integer] (optional): when multiple fields resolve to the same ECS field name, the field with the
345
+ # highest `prioriry` will be used by the encoder.
346
+ def initialize(name, key: name, ecs_field: name, legacy:nil, priority:0, normalize:nil)
347
+ @name = name
348
+ @key = key
349
+ @ecs_field = ecs_field
350
+ @legacy = legacy
351
+ @priority = priority
352
+ @normalize = normalize
353
+ end
354
+ attr_reader :name
355
+ attr_reader :key
356
+ attr_reader :ecs_field
357
+ attr_reader :legacy
358
+ attr_reader :priority
359
+ attr_reader :normalize
360
+ end
361
+
362
+ def generate_mappings!
363
+ encode_mapping = Hash.new
364
+ decode_mapping = Hash.new
365
+ timestamp_fields = Set.new
366
+ [
367
+ CEFField.new("agentAddress", key: "agt", ecs_field: "[agent][ip]"),
368
+ CEFField.new("agentDnsDomain", ecs_field: "[cef][agent][registered_domain]", priority: 10),
369
+ CEFField.new("agentHostName", key: "ahost", ecs_field: "[agent][name]"),
370
+ CEFField.new("agentId", key: "aid", ecs_field: "[agent][id]"),
371
+ CEFField.new("agentMacAddress", key: "amac", ecs_field: "[agent][mac]"),
372
+ CEFField.new("agentNtDomain", ecs_field: "[cef][agent][registered_domain]"),
373
+ CEFField.new("agentReceiptTime", key: "art", ecs_field: "[event][created]", normalize: :timestamp),
374
+ CEFField.new("agentTimeZone", key: "atz", ecs_field: "[cef][agent][timezone]"),
375
+ CEFField.new("agentTranslatedAddress", ecs_field: "[cef][agent][nat][ip]"),
376
+ CEFField.new("agentTranslatedZoneExternalID", ecs_field: "[cef][agent][translated_zone][external_id]"),
377
+ CEFField.new("agentTranslatedZoneURI", ecs_field: "[cef][agent][translated_zone][uri]"),
378
+ CEFField.new("agentType", key: "at", ecs_field: "[agent][type]"),
379
+ CEFField.new("agentVersion", key: "av", ecs_field: "[agent][version]"),
380
+ CEFField.new("agentZoneExternalID", ecs_field: "[cef][agent][zone][external_id]"),
381
+ CEFField.new("agentZoneURI", ecs_field: "[cef][agent][zone][uri]"),
382
+ CEFField.new("applicationProtocol", key: "app", ecs_field: "[network][protocol]"),
383
+ CEFField.new("baseEventCount", key: "cnt", ecs_field: "[cef][base_event_count]"),
384
+ CEFField.new("bytesIn", key: "in", ecs_field: "[source][bytes]"),
385
+ CEFField.new("bytesOut", key: "out", ecs_field: "[destination][bytes]"),
386
+ CEFField.new("categoryDeviceType", key: "catdt", ecs_field: "[cef][device_type]"),
387
+ CEFField.new("customerExternalID", ecs_field: "[organization][id]"),
388
+ CEFField.new("customerURI", ecs_field: "[organization][name]"),
389
+ CEFField.new("destinationAddress", key: "dst", ecs_field: "[destination][ip]"),
390
+ CEFField.new("destinationDnsDomain", ecs_field: "[destination][registered_domain]", priority: 10),
391
+ CEFField.new("destinationGeoLatitude", key: "dlat", ecs_field: "[destination][geo][location][lat]", legacy: "destinationLatitude"),
392
+ CEFField.new("destinationGeoLongitude", key: "dlong", ecs_field: "[destination][geo][location][lon]", legacy: "destinationLongitude"),
393
+ CEFField.new("destinationHostName", key: "dhost", ecs_field: "[destination][domain]"),
394
+ CEFField.new("destinationMacAddress", key: "dmac", ecs_field: "[destination][mac]"),
395
+ CEFField.new("destinationNtDomain", key: "dntdom", ecs_field: "[destination][registered_domain]"),
396
+ CEFField.new("destinationPort", key: "dpt", ecs_field: "[destination][port]"),
397
+ CEFField.new("destinationProcessId", key: "dpid", ecs_field: "[destination][process][pid]"),
398
+ CEFField.new("destinationProcessName", key: "dproc", ecs_field: "[destination][process][name]"),
399
+ CEFField.new("destinationServiceName", ecs_field: "[destination][service][name]"),
400
+ CEFField.new("destinationTranslatedAddress", ecs_field: "[destination][nat][ip]"),
401
+ CEFField.new("destinationTranslatedPort", ecs_field: "[destination][nat][port]"),
402
+ CEFField.new("destinationTranslatedZoneExternalID", ecs_field: "[cef][destination][translated_zone][external_id]"),
403
+ CEFField.new("destinationTranslatedZoneURI", ecs_field: "[cef][destination][translated_zone][uri]"),
404
+ CEFField.new("destinationUserId", key: "duid", ecs_field: "[destination][user][id]"),
405
+ CEFField.new("destinationUserName", key: "duser", ecs_field: "[destination][user][name]"),
406
+ CEFField.new("destinationUserPrivileges", key: "dpriv", ecs_field: "[destination][user][group][name]"),
407
+ CEFField.new("destinationZoneExternalID", ecs_field: "[cef][destination][zone][external_id]"),
408
+ CEFField.new("destinationZoneURI", ecs_field: "[cef][destination][zone][uri]"),
409
+ CEFField.new("deviceAction", key: "act", ecs_field: "[event][action]"),
410
+ CEFField.new("deviceAddress", key: "dvc", ecs_field: "[#{@device}][ip]"),
411
+ CEFField.new("deviceCustomFloatingPoint1", key: "cfp1", ecs_field: "[cef][device_custom_floating_point_1][value]"),
412
+ CEFField.new("deviceCustomFloatingPoint1Label", key: "cfp1Label", ecs_field: "[cef][device_custom_floating_point_1][label]"),
413
+ CEFField.new("deviceCustomFloatingPoint2", key: "cfp2", ecs_field: "[cef][device_custom_floating_point_2][value]"),
414
+ CEFField.new("deviceCustomFloatingPoint2Label", key: "cfp2Label", ecs_field: "[cef][device_custom_floating_point_2][label]"),
415
+ CEFField.new("deviceCustomFloatingPoint3", key: "cfp3", ecs_field: "[cef][device_custom_floating_point_3][value]"),
416
+ CEFField.new("deviceCustomFloatingPoint3Label", key: "cfp3Label", ecs_field: "[cef][device_custom_floating_point_3][label]"),
417
+ CEFField.new("deviceCustomFloatingPoint4", key: "cfp4", ecs_field: "[cef][device_custom_floating_point_4][value]"),
418
+ CEFField.new("deviceCustomFloatingPoint4Label", key: "cfp4Label", ecs_field: "[cef][device_custom_floating_point_4][label]"),
419
+ CEFField.new("deviceCustomIPv6Address1", key: "c6a1", ecs_field: "[cef][device_custom_ipv6_address_1][value]"),
420
+ CEFField.new("deviceCustomIPv6Address1Label", key: "c6a1Label", ecs_field: "[cef][device_custom_ipv6_address_1][label]"),
421
+ CEFField.new("deviceCustomIPv6Address2", key: "c6a2", ecs_field: "[cef][device_custom_ipv6_address_2][value]"),
422
+ CEFField.new("deviceCustomIPv6Address2Label", key: "c6a2Label", ecs_field: "[cef][device_custom_ipv6_address_2][label]"),
423
+ CEFField.new("deviceCustomIPv6Address3", key: "c6a3", ecs_field: "[cef][device_custom_ipv6_address_3][value]"),
424
+ CEFField.new("deviceCustomIPv6Address3Label", key: "c6a3Label", ecs_field: "[cef][device_custom_ipv6_address_3][label]"),
425
+ CEFField.new("deviceCustomIPv6Address4", key: "c6a4", ecs_field: "[cef][device_custom_ipv6_address_4][value]"),
426
+ CEFField.new("deviceCustomIPv6Address4Label", key: "c6a4Label", ecs_field: "[cef][device_custom_ipv6_address_4][label]"),
427
+ CEFField.new("deviceCustomNumber1", key: "cn1", ecs_field: "[cef][device_custom_number_1][value]"),
428
+ CEFField.new("deviceCustomNumber1Label", key: "cn1Label", ecs_field: "[cef][device_custom_number_1][label]"),
429
+ CEFField.new("deviceCustomNumber2", key: "cn2", ecs_field: "[cef][device_custom_number_2][value]"),
430
+ CEFField.new("deviceCustomNumber2Label", key: "cn2Label", ecs_field: "[cef][device_custom_number_2][label]"),
431
+ CEFField.new("deviceCustomNumber3", key: "cn3", ecs_field: "[cef][device_custom_number_3][value]"),
432
+ CEFField.new("deviceCustomNumber3Label", key: "cn3Label", ecs_field: "[cef][device_custom_number_3][label]"),
433
+ CEFField.new("deviceCustomString1", key: "cs1", ecs_field: "[cef][device_custom_string_1][value]"),
434
+ CEFField.new("deviceCustomString1Label", key: "cs1Label", ecs_field: "[cef][device_custom_string_1][label]"),
435
+ CEFField.new("deviceCustomString2", key: "cs2", ecs_field: "[cef][device_custom_string_2][value]"),
436
+ CEFField.new("deviceCustomString2Label", key: "cs2Label", ecs_field: "[cef][device_custom_string_2][label]"),
437
+ CEFField.new("deviceCustomString3", key: "cs3", ecs_field: "[cef][device_custom_string_3][value]"),
438
+ CEFField.new("deviceCustomString3Label", key: "cs3Label", ecs_field: "[cef][device_custom_string_3][label]"),
439
+ CEFField.new("deviceCustomString4", key: "cs4", ecs_field: "[cef][device_custom_string_4][value]"),
440
+ CEFField.new("deviceCustomString4Label", key: "cs4Label", ecs_field: "[cef][device_custom_string_4][label]"),
441
+ CEFField.new("deviceCustomString5", key: "cs5", ecs_field: "[cef][device_custom_string_5][value]"),
442
+ CEFField.new("deviceCustomString5Label", key: "cs5Label", ecs_field: "[cef][device_custom_string_5][label]"),
443
+ CEFField.new("deviceCustomString6", key: "cs6", ecs_field: "[cef][device_custom_string_6][value]"),
444
+ CEFField.new("deviceCustomString6Label", key: "cs6Label", ecs_field: "[cef][device_custom_string_6][label]"),
445
+ CEFField.new("deviceDirection", ecs_field: "[network][direction]"),
446
+ CEFField.new("deviceDnsDomain", ecs_field: "[#{@device}][registered_domain]", priority: 10),
447
+ CEFField.new("deviceEventCategory", key: "cat", ecs_field: "[cef][category]"),
448
+ CEFField.new("deviceExternalId", ecs_field: (@device == 'host' ? "[host][id]" : "[observer][name]")),
449
+ CEFField.new("deviceFacility", ecs_field: "[log][syslog][facility][code]"),
450
+ CEFField.new("deviceHostName", key: "dvchost", ecs_field: (@device == 'host' ? '[host][name]' : '[observer][hostname]')),
451
+ CEFField.new("deviceInboundInterface", ecs_field: "[observer][ingress][interface][name]"),
452
+ CEFField.new("deviceMacAddress", key: "dvcmac", ecs_field: "[@device][mac]"),
453
+ CEFField.new("deviceNtDomain", ecs_field: "[cef][nt_domain]"),
454
+ CEFField.new("deviceOutboundInterface", ecs_field: "[observer][egress][interface][name]"),
455
+ CEFField.new("devicePayloadId", ecs_field: "[cef][payload_id]"),
456
+ CEFField.new("deviceProcessId", key: "dvcpid", ecs_field: "[process][pid]"),
457
+ CEFField.new("deviceProcessName", ecs_field: "[process][name]"),
458
+ CEFField.new("deviceReceiptTime", key: "rt", ecs_field: "@timestamp", normalize: :timestamp),
459
+ CEFField.new("deviceTimeZone", key: "dtz", ecs_field: "[event][timezone]", legacy: "destinationTimeZone"),
460
+ CEFField.new("deviceTranslatedAddress", ecs_field: "[host][nat][ip]"),
461
+ CEFField.new("deviceTranslatedZoneExternalID", ecs_field: "[cef][translated_zone][external_id]"),
462
+ CEFField.new("deviceTranslatedZoneURI", ecs_field: "[cef][translated_zone][uri]"),
463
+ CEFField.new("deviceVersion", ecs_field: "[observer][version]"),
464
+ CEFField.new("deviceZoneExternalID", ecs_field: "[cef][zone][external_id]"),
465
+ CEFField.new("deviceZoneURI", ecs_field: "[cef][zone][uri]"),
466
+ CEFField.new("endTime", key: "end", ecs_field: "[event][end]", normalize: :timestamp),
467
+ CEFField.new("eventId", ecs_field: "[event][id]"),
468
+ CEFField.new("eventOutcome", key: "outcome", ecs_field: "[event][outcome]"),
469
+ CEFField.new("externalId", ecs_field: "[cef][external_id]"),
470
+ CEFField.new("fileCreateTime", ecs_field: "[file][created]"),
471
+ CEFField.new("fileHash", ecs_field: "[file][hash]]"),
472
+ CEFField.new("fileId", ecs_field: "[file][inode]"),
473
+ CEFField.new("fileModificationTime", ecs_field: "[file][mtime]", normalize: :timestamp),
474
+ CEFField.new("fileName", key: "fname", ecs_field: "[file][name]"),
475
+ CEFField.new("filePath", ecs_field: "[file][path]"),
476
+ CEFField.new("filePermission", ecs_field: "[file][group]"),
477
+ CEFField.new("fileSize", key: "fsize", ecs_field: "[file][size]"),
478
+ CEFField.new("fileType", ecs_field: "[file][extension]"),
479
+ CEFField.new("managerReceiptTime", key: "mrt", ecs_field: "[event][ingested]", normalize: :timestamp),
480
+ CEFField.new("message", key: "msg", ecs_field: "[message]"),
481
+ CEFField.new("oldFileCreateTime", ecs_field: "[cef][old_file][created]", normalize: :timestamp),
482
+ CEFField.new("oldFileHash", ecs_field: "[cef][old_file][hash]"),
483
+ CEFField.new("oldFileId", ecs_field: "[cef][old_file][inode]"),
484
+ CEFField.new("oldFileModificationTime", ecs_field: "[cef][old_file][mtime]", normalize: :timestamp),
485
+ CEFField.new("oldFileName", ecs_field: "[cef][old_file][name]"),
486
+ CEFField.new("oldFilePath", ecs_field: "[cef][old_file][path]"),
487
+ CEFField.new("oldFilePermission", ecs_field: "[cef][old_file][group]"),
488
+ CEFField.new("oldFileSize", ecs_field: "[cef][old_file][size]"),
489
+ CEFField.new("oldFileType", ecs_field: "[cef][old_file][extension]"),
490
+ CEFField.new("rawEvent", ecs_field: "[event][original]"),
491
+ CEFField.new("Reason", key: "reason", ecs_field: "[event][reason]"),
492
+ CEFField.new("requestClientApplication", ecs_field: "[user_agent][original]"),
493
+ CEFField.new("requestContext", ecs_field: "[http][request][referrer]"),
494
+ CEFField.new("requestCookies", ecs_field: "[cef][request][cookies]"),
495
+ CEFField.new("requestMethod", ecs_field: "[http][request][method]"),
496
+ CEFField.new("requestUrl", key: "request", ecs_field: "[url][original]"),
497
+ CEFField.new("sourceAddress", key: "src", ecs_field: "[source][ip]"),
498
+ CEFField.new("sourceDnsDomain", ecs_field: "[source][registered_domain]", priority: 10),
499
+ CEFField.new("sourceGeoLatitude", key: "slat", ecs_field: "[source][geo][location][lat]", legacy: "sourceLatitude"),
500
+ CEFField.new("sourceGeoLongitude", key: "slong", ecs_field: "[source][geo][location][lon]", legacy: "sourceLongitude"),
501
+ CEFField.new("sourceHostName", key: "shost", ecs_field: "[source][domain]"),
502
+ CEFField.new("sourceMacAddress", key: "smac", ecs_field: "[source][mac]"),
503
+ CEFField.new("sourceNtDomain", key: "sntdom", ecs_field: "[source][registered_domain]"),
504
+ CEFField.new("sourcePort", key: "spt", ecs_field: "[source][port]"),
505
+ CEFField.new("sourceProcessId", key: "spid", ecs_field: "[source][process][pid]"),
506
+ CEFField.new("sourceProcessName", key: "sproc", ecs_field: "[source][process][name]"),
507
+ CEFField.new("sourceServiceName", ecs_field: "[source][service][name]"),
508
+ CEFField.new("sourceTranslatedAddress", ecs_field: "[source][nat][ip]"),
509
+ CEFField.new("sourceTranslatedPort", ecs_field: "[source][nat][port]"),
510
+ CEFField.new("sourceTranslatedZoneExternalID", ecs_field: "[cef][source][translated_zone][external_id]"),
511
+ CEFField.new("sourceTranslatedZoneURI", ecs_field: "[cef][source][translated_zone][uri]"),
512
+ CEFField.new("sourceUserId", key: "suid", ecs_field: "[source][user][id]"),
513
+ CEFField.new("sourceUserName", key: "suser", ecs_field: "[source][user][name]"),
514
+ CEFField.new("sourceUserPrivileges", key: "spriv", ecs_field: "[source][user][group][name]"),
515
+ CEFField.new("sourceZoneExternalID", ecs_field: "[cef][source][zone][external_id]"),
516
+ CEFField.new("sourceZoneURI", ecs_field: "[cef][source][zone][uri]"),
517
+ CEFField.new("startTime", key: "start", ecs_field: "[event][start]", normalize: :timestamp),
518
+ CEFField.new("transportProtocol", key: "proto", ecs_field: "[network][transport]"),
519
+ CEFField.new("type", ecs_field: "[cef][type]"),
520
+ ].sort_by(&:priority).each do |cef|
521
+ field_name = ecs_select[disabled:cef.name, v1:cef.ecs_field]
522
+
523
+ # whether the source is a cef_key or cef_name, normalize to field_name
524
+ decode_mapping[cef.key] = field_name
525
+ decode_mapping[cef.name] = field_name
526
+
527
+ # whether source is a cef_name or a field_name, normalize to target
528
+ normalized_encode_target = @reverse_mapping ? cef.key : cef.name
529
+ encode_mapping[field_name] = normalized_encode_target
530
+ encode_mapping[cef.name] = normalized_encode_target unless cef.name == field_name
531
+
532
+ # if a field has an alias, normalize pass-through
533
+ if cef.legacy
534
+ decode_mapping[cef.legacy] = ecs_select[disabled:cef.legacy, v1:cef.ecs_field]
535
+ encode_mapping[cef.legacy] = @reverse_mapping ? cef.key : cef.legacy
536
+ end
537
+
538
+ timestamp_fields << field_name if ecs_compatibility != :disabled && cef.normalize == :timestamp
539
+ end
540
+
541
+ @decode_mapping = decode_mapping.dup.freeze
542
+ @encode_mapping = encode_mapping.dup.freeze
543
+ @timestamp_fields = timestamp_fields.dup.freeze
544
+ end
545
+
342
546
  # Escape pipes and backslashes in the header. Equal signs are ok.
343
547
  # Newlines are forbidden.
344
548
  def sanitize_header_field(value)
345
- output = ""
346
-
347
- value = value.to_s.gsub(/\r\n/, "\n")
348
-
349
- value.each_char{|c|
350
- case c
351
- when "\\", "|"
352
- output += "\\" + c
353
- when "\n", "\r"
354
- output += " "
355
- else
356
- output += c
357
- end
358
- }
359
-
360
- return output
549
+ value.to_s
550
+ .gsub("\r\n", "\n")
551
+ .gsub(HEADER_FIELD_SANITIZER_PATTERN, HEADER_FIELD_SANITIZER_MAPPING)
361
552
  end
362
553
 
363
554
  # Keys must be made up of a single word, with no spaces
364
555
  # must be alphanumeric
365
556
  def sanitize_extension_key(value)
366
- value = value.to_s.gsub(/[^a-zA-Z0-9]/, "")
367
- return value
557
+ value.to_s
558
+ .gsub(/[^a-zA-Z0-9]/, "")
368
559
  end
369
560
 
370
561
  # Escape equal signs in the extensions. Canonicalize newlines.
371
562
  # CEF spec leaves it up to us to choose \r or \n for newline.
372
563
  # We choose \n as the default.
373
564
  def sanitize_extension_val(value)
374
- output = ""
375
-
376
- value = value.to_s.gsub(/\r\n/, "\n")
377
-
378
- value.each_char{|c|
379
- case c
380
- when "\\", "="
381
- output += "\\" + c
382
- when "\n", "\r"
383
- output += "\\n"
384
- else
385
- output += c
386
- end
387
- }
565
+ value.to_s
566
+ .gsub("\r\n", "\n")
567
+ .gsub(EXTENSION_VALUE_SANITIZER_PATTERN, EXTENSION_VALUE_SANITIZER_MAPPING)
568
+ end
569
+
570
+ def normalize_timestamp(value, device_timezone_name)
571
+ value = @timestamp_normalzer.normalize(value, device_timezone_name).iso8601(9)
388
572
 
389
- return output
573
+ LogStash::Timestamp.new(value)
574
+ rescue => e
575
+ @logger.error("Failed to parse CEF timestamp value `#{value}` (#{e.message})")
576
+ raise InvalidTimestamp.new("Not a valid CEF timestamp: `#{value}`")
390
577
  end
391
578
 
392
579
  def get_value(fieldname, event)
@@ -394,12 +581,9 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
394
581
 
395
582
  return nil if val.nil?
396
583
 
397
- key = sanitize_extension_key(fieldname)
398
-
399
- if @reverse_mapping
400
- key = REVERSE_MAPPINGS[key] || key
401
- end
402
-
584
+ key = @encode_mapping.fetch(fieldname, fieldname)
585
+ key = sanitize_extension_key(key)
586
+
403
587
  case val
404
588
  when Array, Hash
405
589
  return "#{key}=#{sanitize_extension_val(val.to_json)}"
@@ -413,7 +597,7 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
413
597
  def sanitize_severity(event, severity)
414
598
  severity = sanitize_header_field(event.sprintf(severity)).strip
415
599
  severity = self.class.get_config["severity"][:default] unless valid_severity?(severity)
416
- severity = severity.to_i.to_s
600
+ severity.to_i.to_s
417
601
  end
418
602
 
419
603
  def valid_severity?(sev)
@@ -424,4 +608,14 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
424
608
  rescue TypeError, ArgumentError
425
609
  false
426
610
  end
611
+
612
+ if Gem::Requirement.new(">= 2.5.0").satisfied_by? Gem::Version.new(RUBY_VERSION)
613
+ def delete_cef_prefix(cef_version)
614
+ cef_version.delete_prefix(CEF_PREFIX)
615
+ end
616
+ else
617
+ def delete_cef_prefix(cef_version)
618
+ cef_version.start_with?(CEF_PREFIX) ? cef_version[CEF_PREFIX.length..-1] : cef_version
619
+ end
620
+ end
427
621
  end