logstash-codec-cef 6.0.0-java → 6.2.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,9 @@ require "logstash/util/buftok"
3
3
  require "logstash/util/charset"
4
4
  require "logstash/codecs/base"
5
5
  require "json"
6
+ require "time"
7
+
8
+ require 'logstash/plugin_mixins/ecs_compatibility_support'
6
9
 
7
10
  # Implementation of a Logstash codec for the ArcSight Common Event Format (CEF)
8
11
  # Based on Revision 20 of Implementing ArcSight CEF, dated from June 05, 2013
@@ -13,6 +16,10 @@ require "json"
13
16
  class LogStash::Codecs::CEF < LogStash::Codecs::Base
14
17
  config_name "cef"
15
18
 
19
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1)
20
+
21
+ InvalidTimestamp = Class.new(StandardError)
22
+
16
23
  # Device vendor field in CEF header. The new value can include `%{foo}` strings
17
24
  # to help you build a new value from other parts of the event.
18
25
  config :vendor, :validate => :string, :default => "Elasticsearch"
@@ -68,106 +75,24 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
68
75
  # * `\\n` (backslash "n") - means newline (ASCII 0x0A)
69
76
  config :delimiter, :validate => :string
70
77
 
78
+ # When parsing timestamps that do not include a UTC offset in payloads that do not
79
+ # include the device's timezone, the default timezone is used.
80
+ # If none is provided the system timezone is used.
81
+ config :default_timezone, :validate => :string
82
+
83
+ # The locale is used to parse abbreviated month names from some CEF timestamp
84
+ # formats.
85
+ # If none is provided, the system default is used.
86
+ config :locale, :validate => :string
87
+
71
88
  # If raw_data_field is set, during decode of an event an additional field with
72
89
  # the provided name is added, which contains the raw data.
73
90
  config :raw_data_field, :validate => :string
74
91
 
75
- HEADER_FIELDS = ['cefVersion','deviceVendor','deviceProduct','deviceVersion','deviceEventClassId','name','severity']
76
-
77
- # Translating and flattening the CEF extensions with known field names as documented in the Common Event Format whitepaper
78
- MAPPINGS = {
79
- "act" => "deviceAction",
80
- "app" => "applicationProtocol",
81
- "c6a1" => "deviceCustomIPv6Address1",
82
- "c6a1Label" => "deviceCustomIPv6Address1Label",
83
- "c6a2" => "deviceCustomIPv6Address2",
84
- "c6a2Label" => "deviceCustomIPv6Address2Label",
85
- "c6a3" => "deviceCustomIPv6Address3",
86
- "c6a3Label" => "deviceCustomIPv6Address3Label",
87
- "c6a4" => "deviceCustomIPv6Address4",
88
- "c6a4Label" => "deviceCustomIPv6Address4Label",
89
- "cat" => "deviceEventCategory",
90
- "cfp1" => "deviceCustomFloatingPoint1",
91
- "cfp1Label" => "deviceCustomFloatingPoint1Label",
92
- "cfp2" => "deviceCustomFloatingPoint2",
93
- "cfp2Label" => "deviceCustomFloatingPoint2Label",
94
- "cfp3" => "deviceCustomFloatingPoint3",
95
- "cfp3Label" => "deviceCustomFloatingPoint3Label",
96
- "cfp4" => "deviceCustomFloatingPoint4",
97
- "cfp4Label" => "deviceCustomFloatingPoint4Label",
98
- "cn1" => "deviceCustomNumber1",
99
- "cn1Label" => "deviceCustomNumber1Label",
100
- "cn2" => "deviceCustomNumber2",
101
- "cn2Label" => "deviceCustomNumber2Label",
102
- "cn3" => "deviceCustomNumber3",
103
- "cn3Label" => "deviceCustomNumber3Label",
104
- "cnt" => "baseEventCount",
105
- "cs1" => "deviceCustomString1",
106
- "cs1Label" => "deviceCustomString1Label",
107
- "cs2" => "deviceCustomString2",
108
- "cs2Label" => "deviceCustomString2Label",
109
- "cs3" => "deviceCustomString3",
110
- "cs3Label" => "deviceCustomString3Label",
111
- "cs4" => "deviceCustomString4",
112
- "cs4Label" => "deviceCustomString4Label",
113
- "cs5" => "deviceCustomString5",
114
- "cs5Label" => "deviceCustomString5Label",
115
- "cs6" => "deviceCustomString6",
116
- "cs6Label" => "deviceCustomString6Label",
117
- "dhost" => "destinationHostName",
118
- "dmac" => "destinationMacAddress",
119
- "dntdom" => "destinationNtDomain",
120
- "dpid" => "destinationProcessId",
121
- "dpriv" => "destinationUserPrivileges",
122
- "dproc" => "destinationProcessName",
123
- "dpt" => "destinationPort",
124
- "dst" => "destinationAddress",
125
- "duid" => "destinationUserId",
126
- "duser" => "destinationUserName",
127
- "dvc" => "deviceAddress",
128
- "dvchost" => "deviceHostName",
129
- "dvcpid" => "deviceProcessId",
130
- "end" => "endTime",
131
- "fname" => "fileName",
132
- "fsize" => "fileSize",
133
- "in" => "bytesIn",
134
- "msg" => "message",
135
- "out" => "bytesOut",
136
- "outcome" => "eventOutcome",
137
- "proto" => "transportProtocol",
138
- "request" => "requestUrl",
139
- "rt" => "deviceReceiptTime",
140
- "shost" => "sourceHostName",
141
- "smac" => "sourceMacAddress",
142
- "sntdom" => "sourceNtDomain",
143
- "spid" => "sourceProcessId",
144
- "spriv" => "sourceUserPrivileges",
145
- "sproc" => "sourceProcessName",
146
- "spt" => "sourcePort",
147
- "src" => "sourceAddress",
148
- "start" => "startTime",
149
- "suid" => "sourceUserId",
150
- "suser" => "sourceUserName",
151
- "ahost" => "agentHost",
152
- "art" => "agentReceiptTime",
153
- "at" => "agentType",
154
- "aid" => "agentId",
155
- "_cefVer" => "cefVersion",
156
- "agt" => "agentAddress",
157
- "av" => "agentVersion",
158
- "atz" => "agentTimeZone",
159
- "dtz" => "destinationTimeZone",
160
- "slong" => "sourceLongitude",
161
- "slat" => "sourceLatitude",
162
- "dlong" => "destinationLongitude",
163
- "dlat" => "destinationLatitude",
164
- "catdt" => "categoryDeviceType",
165
- "mrt" => "managerReceiptTime",
166
- "amac" => "agentMacAddress"
167
- }
168
-
169
- # Reverse mapping of CEF full field names to CEF extensions field names for encoding into a CEF event for output.
170
- REVERSE_MAPPINGS = MAPPINGS.invert
92
+ # Defines whether a set of device-specific CEF fields represent the _observer_,
93
+ # or the actual `host` on which the event occurred. If this codec handles a mix,
94
+ # it is safe to use the default `observer`.
95
+ config :device, :validate => %w(observer host), :default => 'observer'
171
96
 
172
97
  # A CEF Header is a sequence of zero or more:
173
98
  # - backslash-escaped pipes; OR
@@ -189,13 +114,16 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
189
114
  # commas, periods, and square-bracketed index offsets.
190
115
  #
191
116
  # To support this, we look for a specific sequence of characters that are followed by an equals sign. This pattern
192
- # will correctly identify all strictly-legal keys, and will also match those that include a dot "subkey"
117
+ # will correctly identify all strictly-legal keys, and will also match those that include a dot-joined "subkeys" and
118
+ # square-bracketed array indexing
193
119
  #
194
120
  # That sequence must begin with one or more `\w` (word: alphanumeric + underscore), which _optionally_ may be followed
195
- # by "subkey" sequence consisting of a literal dot (`.`) followed by a non-whitespace character, then one or more word
196
- # characters, and then one or more characters that do not convey semantic meaning within CEF (e.g., literal-pipe (`|`),
197
- # whitespace (`\s`), literal-dot (`.`), literal-equals (`=`), or literal-backslash ('\')).
198
- EXTENSION_KEY_PATTERN = /(?:\w+(?:\.[^\s]\w+[^\|\s\.\=\\]+)?(?==))/
121
+ # by one or more "subkey" sequences and an optional square-bracketed index.
122
+ #
123
+ # To be understood by this implementation, a "subkey" sequence must consist of a literal dot (`.`) followed by one or
124
+ # more characters that do not convey semantic meaning within CEF (e.g., literal-dot (`.`), literal-equals (`=`),
125
+ # whitespace (`\s`), literal-pipe (`|`), literal-backslash ('\'), or literal-square brackets (`[` or `]`)).
126
+ EXTENSION_KEY_PATTERN = /(?:\w+(?:\.[^\.=\s\|\\\[\]]+)*(?:\[[0-9]+\])?(?==))/
199
127
 
200
128
  # Some CEF extension keys seen in the wild use an undocumented array-like syntax that may not be compatible with
201
129
  # the Event API's strict-mode FieldReference parser (e.g., `fieldname[0]`).
@@ -212,6 +140,30 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
212
140
  # Cache of a scanner pattern that _captures_ extension field key/value pairs
213
141
  EXTENSION_KEY_VALUE_SCANNER = /(#{EXTENSION_KEY_PATTERN})=(#{EXTENSION_VALUE_PATTERN})\s*/
214
142
 
143
+ ##
144
+ # @see CEF#sanitize_header_field
145
+ HEADER_FIELD_SANITIZER_MAPPING = {
146
+ "\\" => "\\\\",
147
+ "|" => "\\|",
148
+ "\n" => " ",
149
+ "\r" => " ",
150
+ }
151
+ HEADER_FIELD_SANITIZER_PATTERN = Regexp.union(HEADER_FIELD_SANITIZER_MAPPING.keys)
152
+ private_constant :HEADER_FIELD_SANITIZER_MAPPING, :HEADER_FIELD_SANITIZER_PATTERN
153
+
154
+ ##
155
+ # @see CEF#sanitize_extension_val
156
+ EXTENSION_VALUE_SANITIZER_MAPPING = {
157
+ "\\" => "\\\\",
158
+ "=" => "\\=",
159
+ "\n" => "\\n",
160
+ "\r" => "\\n",
161
+ }
162
+ EXTENSION_VALUE_SANITIZER_PATTERN = Regexp.union(EXTENSION_VALUE_SANITIZER_MAPPING.keys)
163
+ private_constant :EXTENSION_VALUE_SANITIZER_MAPPING, :EXTENSION_VALUE_SANITIZER_PATTERN
164
+
165
+ CEF_PREFIX = 'CEF:'.freeze
166
+
215
167
  public
216
168
  def initialize(params={})
217
169
  super(params)
@@ -228,6 +180,12 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
228
180
  @delimiter = @delimiter.gsub("\\r", "\r").gsub("\\n", "\n")
229
181
  @buffer = FileWatch::BufferedTokenizer.new(@delimiter)
230
182
  end
183
+
184
+ require_relative 'cef/timestamp_normalizer'
185
+ @timestamp_normalzer = TimestampNormalizer.new(locale: @locale, timezone: @default_timezone)
186
+
187
+ generate_header_fields!
188
+ generate_mappings!
231
189
  end
232
190
 
233
191
  public
@@ -242,6 +200,7 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
242
200
  end
243
201
 
244
202
  def handle(data, &block)
203
+ original_data = data.dup
245
204
  event = LogStash::Event.new
246
205
  event.set(raw_data_field, data) unless raw_data_field.nil?
247
206
 
@@ -258,7 +217,7 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
258
217
 
259
218
  # Use a scanning parser to capture the HEADER_FIELDS
260
219
  unprocessed_data = data
261
- HEADER_FIELDS.each do |field_name|
220
+ @header_fields.each do |field_name|
262
221
  match_data = HEADER_SCANNER.match(unprocessed_data)
263
222
  break if match_data.nil? # missing fields
264
223
 
@@ -276,22 +235,24 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
276
235
  message = unprocessed_data
277
236
 
278
237
  # Try and parse out the syslog header if there is one
279
- if event.get('cefVersion').include? ' '
280
- split_cef_version= event.get('cefVersion').rpartition(' ')
281
- event.set('syslog', split_cef_version[0])
282
- event.set('cefVersion',split_cef_version[2])
238
+ cef_version_field = @header_fields[0]
239
+ if (cef_version = event.get(cef_version_field)).include?(' ')
240
+ split_cef_version = cef_version.rpartition(' ')
241
+ event.set(@syslog_header, split_cef_version[0])
242
+ event.set(cef_version_field, split_cef_version[2])
283
243
  end
284
244
 
285
245
  # Get rid of the CEF bit in the version
286
- event.set('cefVersion', event.get('cefVersion').sub(/^CEF:/, ''))
246
+ event.set(cef_version_field, delete_cef_prefix(event.get(cef_version_field)))
287
247
 
288
248
  # Use a scanning parser to capture the Extension Key/Value Pairs
289
249
  if message && message.include?('=')
290
250
  message = message.strip
251
+ extension_fields = {}
291
252
 
292
253
  message.scan(EXTENSION_KEY_VALUE_SCANNER) do |extension_field_key, raw_extension_field_value|
293
254
  # expand abbreviated extension field keys
294
- extension_field_key = MAPPINGS.fetch(extension_field_key, extension_field_key)
255
+ extension_field_key = @decode_mapping.fetch(extension_field_key, extension_field_key)
295
256
 
296
257
  # convert extension field name to strict legal field_reference, fixing field names with ambiguous array-like syntax
297
258
  extension_field_key = extension_field_key.sub(EXTENSION_KEY_ARRAY_CAPTURE, '[\1]\2') if extension_field_key.end_with?(']')
@@ -299,13 +260,28 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
299
260
  # process legal extension field value escapes
300
261
  extension_field_value = raw_extension_field_value.gsub(EXTENSION_VALUE_ESCAPE_CAPTURE, '\1')
301
262
 
302
- event.set(extension_field_key, extension_field_value)
263
+ extension_fields[extension_field_key] = extension_field_value
264
+ end
265
+
266
+ # in ECS mode, normalize timestamps including timezone.
267
+ if ecs_compatibility != :disabled
268
+ device_timezone = extension_fields['[event][timezone]']
269
+ @timestamp_fields.each do |timestamp_field_name|
270
+ raw_timestamp = extension_fields.delete(timestamp_field_name) or next
271
+ value = normalize_timestamp(raw_timestamp, device_timezone)
272
+ event.set(timestamp_field_name, value)
273
+ end
274
+ end
275
+
276
+ extension_fields.each do |field_key, field_value|
277
+ event.set(field_key, field_value)
303
278
  end
304
279
  end
305
280
 
306
281
  yield event
307
282
  rescue => e
308
- @logger.error("Failed to decode CEF payload. Generating failure event with payload in message field.", :error => e.message, :backtrace => e.backtrace, :data => data)
283
+ @logger.error("Failed to decode CEF payload. Generating failure event with payload in message field.",
284
+ :exception => e.class, :message => e.message, :backtrace => e.backtrace, :original_data => original_data)
309
285
  yield LogStash::Event.new("message" => data, "tags" => ["_cefparsefailure"])
310
286
  end
311
287
 
@@ -314,79 +290,290 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
314
290
  # "CEF:0|Elasticsearch|Logstash|1.0|Signature|Name|Sev|"
315
291
 
316
292
  vendor = sanitize_header_field(event.sprintf(@vendor))
317
- vendor = self.class.get_config["vendor"][:default] if vendor == ""
293
+ vendor = self.class.get_config["vendor"][:default] if vendor.empty?
318
294
 
319
295
  product = sanitize_header_field(event.sprintf(@product))
320
- product = self.class.get_config["product"][:default] if product == ""
296
+ product = self.class.get_config["product"][:default] if product.empty?
321
297
 
322
298
  version = sanitize_header_field(event.sprintf(@version))
323
- version = self.class.get_config["version"][:default] if version == ""
299
+ version = self.class.get_config["version"][:default] if version.empty?
324
300
 
325
301
  signature = sanitize_header_field(event.sprintf(@signature))
326
- signature = self.class.get_config["signature"][:default] if signature == ""
302
+ signature = self.class.get_config["signature"][:default] if signature.empty?
327
303
 
328
304
  name = sanitize_header_field(event.sprintf(@name))
329
- name = self.class.get_config["name"][:default] if name == ""
305
+ name = self.class.get_config["name"][:default] if name.empty?
330
306
 
331
307
  severity = sanitize_severity(event, @severity)
332
308
 
333
309
  # Should also probably set the fields sent
334
310
  header = ["CEF:0", vendor, product, version, signature, name, severity].join("|")
335
- values = @fields.map {|fieldname| get_value(fieldname, event)}.compact.join(" ")
311
+ values = @fields.map { |fieldname| get_value(fieldname, event) }.compact.join(" ")
336
312
 
337
313
  @on_event.call(event, "#{header}|#{values}#{@delimiter}")
338
314
  end
339
315
 
340
316
  private
341
317
 
318
+ def generate_header_fields!
319
+ # @header_fields is an _ordered_ set of fields.
320
+ @header_fields = [
321
+ ecs_select[disabled: 'cefVersion', v1: '[cef][version]'],
322
+ ecs_select[disabled: 'deviceVendor', v1: '[observer][vendor]'],
323
+ ecs_select[disabled: 'deviceProduct', v1: '[observer][product]'],
324
+ ecs_select[disabled: 'deviceVersion', v1: '[observer][version]'],
325
+ ecs_select[disabled: 'deviceEventClassId', v1: '[event][code]'],
326
+ ecs_select[disabled: 'name', v1: '[cef][name]'],
327
+ ecs_select[disabled: 'severity', v1: '[event][severity]']
328
+ ].map(&:freeze).freeze
329
+ # the @syslog_header is the field name used when a syslog header preceeds the CEF Version.
330
+ @syslog_header = ecs_select[disabled:'syslog',v1:'[log][syslog][header]']
331
+ end
332
+
333
+ class CEFField
334
+ ##
335
+ # @param name [String]: the full CEF name of a field
336
+ # @param key [String] (optional): an abbreviated CEF key to use when encoding a value with `reverse_mapping => true`
337
+ # when left unspecified, the `key` is the field's `name`.
338
+ # @param ecs_field [String] (optional): an ECS-compatible field reference to use, with square-bracket syntax.
339
+ # when left unspecified, the `ecs_field` is the field's `name`.
340
+ # @param legacy [String] (optional): a legacy CEF name to support in pass-through.
341
+ # in decoding mode without ECS, field name will be used as-provided.
342
+ # in encoding mode without ECS when provided to `fields` and `reverse_mapping => false`,
343
+ # field name will be used as-provided.
344
+ # @param priority [Integer] (optional): when multiple fields resolve to the same ECS field name, the field with the
345
+ # highest `prioriry` will be used by the encoder.
346
+ def initialize(name, key: name, ecs_field: name, legacy:nil, priority:0, normalize:nil)
347
+ @name = name
348
+ @key = key
349
+ @ecs_field = ecs_field
350
+ @legacy = legacy
351
+ @priority = priority
352
+ @normalize = normalize
353
+ end
354
+ attr_reader :name
355
+ attr_reader :key
356
+ attr_reader :ecs_field
357
+ attr_reader :legacy
358
+ attr_reader :priority
359
+ attr_reader :normalize
360
+ end
361
+
362
+ def generate_mappings!
363
+ encode_mapping = Hash.new
364
+ decode_mapping = Hash.new
365
+ timestamp_fields = Set.new
366
+ [
367
+ CEFField.new("agentAddress", key: "agt", ecs_field: "[agent][ip]"),
368
+ CEFField.new("agentDnsDomain", ecs_field: "[cef][agent][registered_domain]", priority: 10),
369
+ CEFField.new("agentHostName", key: "ahost", ecs_field: "[agent][name]"),
370
+ CEFField.new("agentId", key: "aid", ecs_field: "[agent][id]"),
371
+ CEFField.new("agentMacAddress", key: "amac", ecs_field: "[agent][mac]"),
372
+ CEFField.new("agentNtDomain", ecs_field: "[cef][agent][registered_domain]"),
373
+ CEFField.new("agentReceiptTime", key: "art", ecs_field: "[event][created]", normalize: :timestamp),
374
+ CEFField.new("agentTimeZone", key: "atz", ecs_field: "[cef][agent][timezone]"),
375
+ CEFField.new("agentTranslatedAddress", ecs_field: "[cef][agent][nat][ip]"),
376
+ CEFField.new("agentTranslatedZoneExternalID", ecs_field: "[cef][agent][translated_zone][external_id]"),
377
+ CEFField.new("agentTranslatedZoneURI", ecs_field: "[cef][agent][translated_zone][uri]"),
378
+ CEFField.new("agentType", key: "at", ecs_field: "[agent][type]"),
379
+ CEFField.new("agentVersion", key: "av", ecs_field: "[agent][version]"),
380
+ CEFField.new("agentZoneExternalID", ecs_field: "[cef][agent][zone][external_id]"),
381
+ CEFField.new("agentZoneURI", ecs_field: "[cef][agent][zone][uri]"),
382
+ CEFField.new("applicationProtocol", key: "app", ecs_field: "[network][protocol]"),
383
+ CEFField.new("baseEventCount", key: "cnt", ecs_field: "[cef][base_event_count]"),
384
+ CEFField.new("bytesIn", key: "in", ecs_field: "[source][bytes]"),
385
+ CEFField.new("bytesOut", key: "out", ecs_field: "[destination][bytes]"),
386
+ CEFField.new("categoryDeviceType", key: "catdt", ecs_field: "[cef][device_type]"),
387
+ CEFField.new("customerExternalID", ecs_field: "[organization][id]"),
388
+ CEFField.new("customerURI", ecs_field: "[organization][name]"),
389
+ CEFField.new("destinationAddress", key: "dst", ecs_field: "[destination][ip]"),
390
+ CEFField.new("destinationDnsDomain", ecs_field: "[destination][registered_domain]", priority: 10),
391
+ CEFField.new("destinationGeoLatitude", key: "dlat", ecs_field: "[destination][geo][location][lat]", legacy: "destinationLatitude"),
392
+ CEFField.new("destinationGeoLongitude", key: "dlong", ecs_field: "[destination][geo][location][lon]", legacy: "destinationLongitude"),
393
+ CEFField.new("destinationHostName", key: "dhost", ecs_field: "[destination][domain]"),
394
+ CEFField.new("destinationMacAddress", key: "dmac", ecs_field: "[destination][mac]"),
395
+ CEFField.new("destinationNtDomain", key: "dntdom", ecs_field: "[destination][registered_domain]"),
396
+ CEFField.new("destinationPort", key: "dpt", ecs_field: "[destination][port]"),
397
+ CEFField.new("destinationProcessId", key: "dpid", ecs_field: "[destination][process][pid]"),
398
+ CEFField.new("destinationProcessName", key: "dproc", ecs_field: "[destination][process][name]"),
399
+ CEFField.new("destinationServiceName", ecs_field: "[destination][service][name]"),
400
+ CEFField.new("destinationTranslatedAddress", ecs_field: "[destination][nat][ip]"),
401
+ CEFField.new("destinationTranslatedPort", ecs_field: "[destination][nat][port]"),
402
+ CEFField.new("destinationTranslatedZoneExternalID", ecs_field: "[cef][destination][translated_zone][external_id]"),
403
+ CEFField.new("destinationTranslatedZoneURI", ecs_field: "[cef][destination][translated_zone][uri]"),
404
+ CEFField.new("destinationUserId", key: "duid", ecs_field: "[destination][user][id]"),
405
+ CEFField.new("destinationUserName", key: "duser", ecs_field: "[destination][user][name]"),
406
+ CEFField.new("destinationUserPrivileges", key: "dpriv", ecs_field: "[destination][user][group][name]"),
407
+ CEFField.new("destinationZoneExternalID", ecs_field: "[cef][destination][zone][external_id]"),
408
+ CEFField.new("destinationZoneURI", ecs_field: "[cef][destination][zone][uri]"),
409
+ CEFField.new("deviceAction", key: "act", ecs_field: "[event][action]"),
410
+ CEFField.new("deviceAddress", key: "dvc", ecs_field: "[#{@device}][ip]"),
411
+ CEFField.new("deviceCustomFloatingPoint1", key: "cfp1", ecs_field: "[cef][device_custom_floating_point_1][value]"),
412
+ CEFField.new("deviceCustomFloatingPoint1Label", key: "cfp1Label", ecs_field: "[cef][device_custom_floating_point_1][label]"),
413
+ CEFField.new("deviceCustomFloatingPoint2", key: "cfp2", ecs_field: "[cef][device_custom_floating_point_2][value]"),
414
+ CEFField.new("deviceCustomFloatingPoint2Label", key: "cfp2Label", ecs_field: "[cef][device_custom_floating_point_2][label]"),
415
+ CEFField.new("deviceCustomFloatingPoint3", key: "cfp3", ecs_field: "[cef][device_custom_floating_point_3][value]"),
416
+ CEFField.new("deviceCustomFloatingPoint3Label", key: "cfp3Label", ecs_field: "[cef][device_custom_floating_point_3][label]"),
417
+ CEFField.new("deviceCustomFloatingPoint4", key: "cfp4", ecs_field: "[cef][device_custom_floating_point_4][value]"),
418
+ CEFField.new("deviceCustomFloatingPoint4Label", key: "cfp4Label", ecs_field: "[cef][device_custom_floating_point_4][label]"),
419
+ CEFField.new("deviceCustomIPv6Address1", key: "c6a1", ecs_field: "[cef][device_custom_ipv6_address_1][value]"),
420
+ CEFField.new("deviceCustomIPv6Address1Label", key: "c6a1Label", ecs_field: "[cef][device_custom_ipv6_address_1][label]"),
421
+ CEFField.new("deviceCustomIPv6Address2", key: "c6a2", ecs_field: "[cef][device_custom_ipv6_address_2][value]"),
422
+ CEFField.new("deviceCustomIPv6Address2Label", key: "c6a2Label", ecs_field: "[cef][device_custom_ipv6_address_2][label]"),
423
+ CEFField.new("deviceCustomIPv6Address3", key: "c6a3", ecs_field: "[cef][device_custom_ipv6_address_3][value]"),
424
+ CEFField.new("deviceCustomIPv6Address3Label", key: "c6a3Label", ecs_field: "[cef][device_custom_ipv6_address_3][label]"),
425
+ CEFField.new("deviceCustomIPv6Address4", key: "c6a4", ecs_field: "[cef][device_custom_ipv6_address_4][value]"),
426
+ CEFField.new("deviceCustomIPv6Address4Label", key: "c6a4Label", ecs_field: "[cef][device_custom_ipv6_address_4][label]"),
427
+ CEFField.new("deviceCustomNumber1", key: "cn1", ecs_field: "[cef][device_custom_number_1][value]"),
428
+ CEFField.new("deviceCustomNumber1Label", key: "cn1Label", ecs_field: "[cef][device_custom_number_1][label]"),
429
+ CEFField.new("deviceCustomNumber2", key: "cn2", ecs_field: "[cef][device_custom_number_2][value]"),
430
+ CEFField.new("deviceCustomNumber2Label", key: "cn2Label", ecs_field: "[cef][device_custom_number_2][label]"),
431
+ CEFField.new("deviceCustomNumber3", key: "cn3", ecs_field: "[cef][device_custom_number_3][value]"),
432
+ CEFField.new("deviceCustomNumber3Label", key: "cn3Label", ecs_field: "[cef][device_custom_number_3][label]"),
433
+ CEFField.new("deviceCustomString1", key: "cs1", ecs_field: "[cef][device_custom_string_1][value]"),
434
+ CEFField.new("deviceCustomString1Label", key: "cs1Label", ecs_field: "[cef][device_custom_string_1][label]"),
435
+ CEFField.new("deviceCustomString2", key: "cs2", ecs_field: "[cef][device_custom_string_2][value]"),
436
+ CEFField.new("deviceCustomString2Label", key: "cs2Label", ecs_field: "[cef][device_custom_string_2][label]"),
437
+ CEFField.new("deviceCustomString3", key: "cs3", ecs_field: "[cef][device_custom_string_3][value]"),
438
+ CEFField.new("deviceCustomString3Label", key: "cs3Label", ecs_field: "[cef][device_custom_string_3][label]"),
439
+ CEFField.new("deviceCustomString4", key: "cs4", ecs_field: "[cef][device_custom_string_4][value]"),
440
+ CEFField.new("deviceCustomString4Label", key: "cs4Label", ecs_field: "[cef][device_custom_string_4][label]"),
441
+ CEFField.new("deviceCustomString5", key: "cs5", ecs_field: "[cef][device_custom_string_5][value]"),
442
+ CEFField.new("deviceCustomString5Label", key: "cs5Label", ecs_field: "[cef][device_custom_string_5][label]"),
443
+ CEFField.new("deviceCustomString6", key: "cs6", ecs_field: "[cef][device_custom_string_6][value]"),
444
+ CEFField.new("deviceCustomString6Label", key: "cs6Label", ecs_field: "[cef][device_custom_string_6][label]"),
445
+ CEFField.new("deviceDirection", ecs_field: "[network][direction]"),
446
+ CEFField.new("deviceDnsDomain", ecs_field: "[#{@device}][registered_domain]", priority: 10),
447
+ CEFField.new("deviceEventCategory", key: "cat", ecs_field: "[cef][category]"),
448
+ CEFField.new("deviceExternalId", ecs_field: (@device == 'host' ? "[host][id]" : "[observer][name]")),
449
+ CEFField.new("deviceFacility", ecs_field: "[log][syslog][facility][code]"),
450
+ CEFField.new("deviceHostName", key: "dvchost", ecs_field: (@device == 'host' ? '[host][name]' : '[observer][hostname]')),
451
+ CEFField.new("deviceInboundInterface", ecs_field: "[observer][ingress][interface][name]"),
452
+ CEFField.new("deviceMacAddress", key: "dvcmac", ecs_field: "[@device][mac]"),
453
+ CEFField.new("deviceNtDomain", ecs_field: "[cef][nt_domain]"),
454
+ CEFField.new("deviceOutboundInterface", ecs_field: "[observer][egress][interface][name]"),
455
+ CEFField.new("devicePayloadId", ecs_field: "[cef][payload_id]"),
456
+ CEFField.new("deviceProcessId", key: "dvcpid", ecs_field: "[process][pid]"),
457
+ CEFField.new("deviceProcessName", ecs_field: "[process][name]"),
458
+ CEFField.new("deviceReceiptTime", key: "rt", ecs_field: "@timestamp", normalize: :timestamp),
459
+ CEFField.new("deviceTimeZone", key: "dtz", ecs_field: "[event][timezone]", legacy: "destinationTimeZone"),
460
+ CEFField.new("deviceTranslatedAddress", ecs_field: "[host][nat][ip]"),
461
+ CEFField.new("deviceTranslatedZoneExternalID", ecs_field: "[cef][translated_zone][external_id]"),
462
+ CEFField.new("deviceTranslatedZoneURI", ecs_field: "[cef][translated_zone][uri]"),
463
+ CEFField.new("deviceVersion", ecs_field: "[observer][version]"),
464
+ CEFField.new("deviceZoneExternalID", ecs_field: "[cef][zone][external_id]"),
465
+ CEFField.new("deviceZoneURI", ecs_field: "[cef][zone][uri]"),
466
+ CEFField.new("endTime", key: "end", ecs_field: "[event][end]", normalize: :timestamp),
467
+ CEFField.new("eventId", ecs_field: "[event][id]"),
468
+ CEFField.new("eventOutcome", key: "outcome", ecs_field: "[event][outcome]"),
469
+ CEFField.new("externalId", ecs_field: "[cef][external_id]"),
470
+ CEFField.new("fileCreateTime", ecs_field: "[file][created]"),
471
+ CEFField.new("fileHash", ecs_field: "[file][hash]]"),
472
+ CEFField.new("fileId", ecs_field: "[file][inode]"),
473
+ CEFField.new("fileModificationTime", ecs_field: "[file][mtime]", normalize: :timestamp),
474
+ CEFField.new("fileName", key: "fname", ecs_field: "[file][name]"),
475
+ CEFField.new("filePath", ecs_field: "[file][path]"),
476
+ CEFField.new("filePermission", ecs_field: "[file][group]"),
477
+ CEFField.new("fileSize", key: "fsize", ecs_field: "[file][size]"),
478
+ CEFField.new("fileType", ecs_field: "[file][extension]"),
479
+ CEFField.new("managerReceiptTime", key: "mrt", ecs_field: "[event][ingested]", normalize: :timestamp),
480
+ CEFField.new("message", key: "msg", ecs_field: "[message]"),
481
+ CEFField.new("oldFileCreateTime", ecs_field: "[cef][old_file][created]", normalize: :timestamp),
482
+ CEFField.new("oldFileHash", ecs_field: "[cef][old_file][hash]"),
483
+ CEFField.new("oldFileId", ecs_field: "[cef][old_file][inode]"),
484
+ CEFField.new("oldFileModificationTime", ecs_field: "[cef][old_file][mtime]", normalize: :timestamp),
485
+ CEFField.new("oldFileName", ecs_field: "[cef][old_file][name]"),
486
+ CEFField.new("oldFilePath", ecs_field: "[cef][old_file][path]"),
487
+ CEFField.new("oldFilePermission", ecs_field: "[cef][old_file][group]"),
488
+ CEFField.new("oldFileSize", ecs_field: "[cef][old_file][size]"),
489
+ CEFField.new("oldFileType", ecs_field: "[cef][old_file][extension]"),
490
+ CEFField.new("rawEvent", ecs_field: "[event][original]"),
491
+ CEFField.new("Reason", key: "reason", ecs_field: "[event][reason]"),
492
+ CEFField.new("requestClientApplication", ecs_field: "[user_agent][original]"),
493
+ CEFField.new("requestContext", ecs_field: "[http][request][referrer]"),
494
+ CEFField.new("requestCookies", ecs_field: "[cef][request][cookies]"),
495
+ CEFField.new("requestMethod", ecs_field: "[http][request][method]"),
496
+ CEFField.new("requestUrl", key: "request", ecs_field: "[url][original]"),
497
+ CEFField.new("sourceAddress", key: "src", ecs_field: "[source][ip]"),
498
+ CEFField.new("sourceDnsDomain", ecs_field: "[source][registered_domain]", priority: 10),
499
+ CEFField.new("sourceGeoLatitude", key: "slat", ecs_field: "[source][geo][location][lat]", legacy: "sourceLatitude"),
500
+ CEFField.new("sourceGeoLongitude", key: "slong", ecs_field: "[source][geo][location][lon]", legacy: "sourceLongitude"),
501
+ CEFField.new("sourceHostName", key: "shost", ecs_field: "[source][domain]"),
502
+ CEFField.new("sourceMacAddress", key: "smac", ecs_field: "[source][mac]"),
503
+ CEFField.new("sourceNtDomain", key: "sntdom", ecs_field: "[source][registered_domain]"),
504
+ CEFField.new("sourcePort", key: "spt", ecs_field: "[source][port]"),
505
+ CEFField.new("sourceProcessId", key: "spid", ecs_field: "[source][process][pid]"),
506
+ CEFField.new("sourceProcessName", key: "sproc", ecs_field: "[source][process][name]"),
507
+ CEFField.new("sourceServiceName", ecs_field: "[source][service][name]"),
508
+ CEFField.new("sourceTranslatedAddress", ecs_field: "[source][nat][ip]"),
509
+ CEFField.new("sourceTranslatedPort", ecs_field: "[source][nat][port]"),
510
+ CEFField.new("sourceTranslatedZoneExternalID", ecs_field: "[cef][source][translated_zone][external_id]"),
511
+ CEFField.new("sourceTranslatedZoneURI", ecs_field: "[cef][source][translated_zone][uri]"),
512
+ CEFField.new("sourceUserId", key: "suid", ecs_field: "[source][user][id]"),
513
+ CEFField.new("sourceUserName", key: "suser", ecs_field: "[source][user][name]"),
514
+ CEFField.new("sourceUserPrivileges", key: "spriv", ecs_field: "[source][user][group][name]"),
515
+ CEFField.new("sourceZoneExternalID", ecs_field: "[cef][source][zone][external_id]"),
516
+ CEFField.new("sourceZoneURI", ecs_field: "[cef][source][zone][uri]"),
517
+ CEFField.new("startTime", key: "start", ecs_field: "[event][start]", normalize: :timestamp),
518
+ CEFField.new("transportProtocol", key: "proto", ecs_field: "[network][transport]"),
519
+ CEFField.new("type", ecs_field: "[cef][type]"),
520
+ ].sort_by(&:priority).each do |cef|
521
+ field_name = ecs_select[disabled:cef.name, v1:cef.ecs_field]
522
+
523
+ # whether the source is a cef_key or cef_name, normalize to field_name
524
+ decode_mapping[cef.key] = field_name
525
+ decode_mapping[cef.name] = field_name
526
+
527
+ # whether source is a cef_name or a field_name, normalize to target
528
+ normalized_encode_target = @reverse_mapping ? cef.key : cef.name
529
+ encode_mapping[field_name] = normalized_encode_target
530
+ encode_mapping[cef.name] = normalized_encode_target unless cef.name == field_name
531
+
532
+ # if a field has an alias, normalize pass-through
533
+ if cef.legacy
534
+ decode_mapping[cef.legacy] = ecs_select[disabled:cef.legacy, v1:cef.ecs_field]
535
+ encode_mapping[cef.legacy] = @reverse_mapping ? cef.key : cef.legacy
536
+ end
537
+
538
+ timestamp_fields << field_name if ecs_compatibility != :disabled && cef.normalize == :timestamp
539
+ end
540
+
541
+ @decode_mapping = decode_mapping.dup.freeze
542
+ @encode_mapping = encode_mapping.dup.freeze
543
+ @timestamp_fields = timestamp_fields.dup.freeze
544
+ end
545
+
342
546
  # Escape pipes and backslashes in the header. Equal signs are ok.
343
547
  # Newlines are forbidden.
344
548
  def sanitize_header_field(value)
345
- output = ""
346
-
347
- value = value.to_s.gsub(/\r\n/, "\n")
348
-
349
- value.each_char{|c|
350
- case c
351
- when "\\", "|"
352
- output += "\\" + c
353
- when "\n", "\r"
354
- output += " "
355
- else
356
- output += c
357
- end
358
- }
359
-
360
- return output
549
+ value.to_s
550
+ .gsub("\r\n", "\n")
551
+ .gsub(HEADER_FIELD_SANITIZER_PATTERN, HEADER_FIELD_SANITIZER_MAPPING)
361
552
  end
362
553
 
363
554
  # Keys must be made up of a single word, with no spaces
364
555
  # must be alphanumeric
365
556
  def sanitize_extension_key(value)
366
- value = value.to_s.gsub(/[^a-zA-Z0-9]/, "")
367
- return value
557
+ value.to_s
558
+ .gsub(/[^a-zA-Z0-9]/, "")
368
559
  end
369
560
 
370
561
  # Escape equal signs in the extensions. Canonicalize newlines.
371
562
  # CEF spec leaves it up to us to choose \r or \n for newline.
372
563
  # We choose \n as the default.
373
564
  def sanitize_extension_val(value)
374
- output = ""
375
-
376
- value = value.to_s.gsub(/\r\n/, "\n")
377
-
378
- value.each_char{|c|
379
- case c
380
- when "\\", "="
381
- output += "\\" + c
382
- when "\n", "\r"
383
- output += "\\n"
384
- else
385
- output += c
386
- end
387
- }
565
+ value.to_s
566
+ .gsub("\r\n", "\n")
567
+ .gsub(EXTENSION_VALUE_SANITIZER_PATTERN, EXTENSION_VALUE_SANITIZER_MAPPING)
568
+ end
569
+
570
+ def normalize_timestamp(value, device_timezone_name)
571
+ value = @timestamp_normalzer.normalize(value, device_timezone_name).iso8601(9)
388
572
 
389
- return output
573
+ LogStash::Timestamp.new(value)
574
+ rescue => e
575
+ @logger.error("Failed to parse CEF timestamp value `#{value}` (#{e.message})")
576
+ raise InvalidTimestamp.new("Not a valid CEF timestamp: `#{value}`")
390
577
  end
391
578
 
392
579
  def get_value(fieldname, event)
@@ -394,12 +581,9 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
394
581
 
395
582
  return nil if val.nil?
396
583
 
397
- key = sanitize_extension_key(fieldname)
398
-
399
- if @reverse_mapping
400
- key = REVERSE_MAPPINGS[key] || key
401
- end
402
-
584
+ key = @encode_mapping.fetch(fieldname, fieldname)
585
+ key = sanitize_extension_key(key)
586
+
403
587
  case val
404
588
  when Array, Hash
405
589
  return "#{key}=#{sanitize_extension_val(val.to_json)}"
@@ -413,7 +597,7 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
413
597
  def sanitize_severity(event, severity)
414
598
  severity = sanitize_header_field(event.sprintf(severity)).strip
415
599
  severity = self.class.get_config["severity"][:default] unless valid_severity?(severity)
416
- severity = severity.to_i.to_s
600
+ severity.to_i.to_s
417
601
  end
418
602
 
419
603
  def valid_severity?(sev)
@@ -424,4 +608,14 @@ class LogStash::Codecs::CEF < LogStash::Codecs::Base
424
608
  rescue TypeError, ArgumentError
425
609
  false
426
610
  end
611
+
612
+ if Gem::Requirement.new(">= 2.5.0").satisfied_by? Gem::Version.new(RUBY_VERSION)
613
+ def delete_cef_prefix(cef_version)
614
+ cef_version.delete_prefix(CEF_PREFIX)
615
+ end
616
+ else
617
+ def delete_cef_prefix(cef_version)
618
+ cef_version.start_with?(CEF_PREFIX) ? cef_version[CEF_PREFIX.length..-1] : cef_version
619
+ end
620
+ end
427
621
  end