fluent-plugin-parser_cefalt 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
4
+ script: bundle exec rspec
5
+ gemfile:
6
+ - Gemfile
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+ gem 'coveralls', require: false
4
+ gem 'simplecov'
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2016 Tomoyuki Sugimura
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,90 @@
1
+ # fluent-plugin-parser_cefalt
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/fluent-plugin-parser_cefalt.svg)](https://badge.fury.io/rb/fluent-plugin-parser_cefalt)
4
+ [![downloads](https://img.shields.io/gem/dt/fluent-plugin-parser_cefalt.svg)](https://rubygems.org/gems/fluent-plugin-parser_cefalt)
5
+ [![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE)
6
+
7
+ Fluentd Parser plugin to parse CEF - common event format - Alternative
8
+
9
+ ## Requirements
10
+
11
+ | fluent-plugin-parser_cefalt | fluentd | ruby |
12
+ |---------------------------|---------|------|
13
+ | >= 1.0.0 | >= v0.14.0 | >= 2.1 |
14
+ | < 1.0.0 | >= v0.12.0 | >= 1.9 |
15
+
16
+ ## Installation
17
+
18
+ Add this line to your application's Gemfile:
19
+
20
+ ```bash
21
+
22
+ # for fluentd v0.14 or higher
23
+
24
+ fluent-gem install fluent-plugin-parser_cefalt
25
+
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ```
31
+ <source>
32
+ @type syslog
33
+ port 5514
34
+ bind 0.0.0.0
35
+ <transport tcp>
36
+ </transport>
37
+ <parse>
38
+ @type cefalt
39
+ log_format syslog
40
+ parse_strict_mode true
41
+ output_raw_field false
42
+ </parse>
43
+ tag ceflog
44
+ </source>
45
+ <match ceflog.**>
46
+ @type stdout
47
+ </match>
48
+ ```
49
+
50
+ ## parameters
51
+
52
+ * `log_format` (default: syslog)
53
+
54
+ input log format, currently only 'syslog' is valid
55
+
56
+ * `log_utc_offset` (default: nil)
57
+
58
+ set log utc_offset if each record does not have timezone information and the timezone is not local timezone
59
+
60
+ if log_utc_offset set to nil or invalid value, then use system timezone
61
+
62
+ if a log have timezone information, log_utc_offset is ignored
63
+
64
+ * `syslog_timestamp` (default: '\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}')
65
+
66
+ * `syslog_timestamp_rfc5424` (default: '\d{4}[-]\d{2}[-]\d{2}[T]\d{2}[:]\d{2}[:]\d{2}(?:\.\d{1,6})?(?:[+-]\d{2}[:]\d{2}|Z)')
67
+
68
+ * `cef_version` (default: 0)
69
+
70
+ CEF version, this should be 0
71
+
72
+ * `parse_strict_mode` (default: true)
73
+
74
+ if the CEF extensions are the following, the value of the key cs2 should 'foo hoge=fuga'
75
+
76
+ - cs1=test cs2=foo hoge=fuga cs3=bar
77
+
78
+ if parse_strict_mode is false, this is raugh parse, so the value of the key cs2 become 'foo' and non CEF key 'hoge' shown, and the value is 'fuga'
79
+
80
+ * `cef_keyfilename` (default: 'config/cef_version_0_keys.yaml')
81
+
82
+ used when parse_strict_mode is true, this is the array of the valid CEF keys
83
+
84
+ * `output_raw_field` (default: false)
85
+
86
+ append {"raw":\<message itself\>} key-value even if success parsing
87
+
88
+ ## License
89
+
90
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.6
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "fluent-plugin-parser_cefalt"
7
+ spec.version = File.read("VERSION").strip
8
+ spec.authors = ["Tomoyuki Sugimura"]
9
+ spec.email = ["tomoyuki.sugimura@gmail.com"]
10
+ spec.description = %q{common event format(CEF) parser plugin for fluentd}
11
+ spec.summary = %q{common event format(CEF) parser plugin, currently only 'syslog' format is permitted}
12
+ spec.homepage = "https://github.com/lunardial/fluent-plugin-parser_cef"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = ">= 2.1"
21
+
22
+ spec.add_runtime_dependency "fluentd", ">= 0.14.0", "< 2"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.3"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ spec.add_development_dependency "rspec-core"
28
+ spec.add_development_dependency "test-unit"
29
+ end
@@ -0,0 +1,221 @@
1
+ # Implementing ArcSight Common Event Format
2
+ # Chapter2: ArcSight Extension Dictionary
3
+
4
+ # CEF Key Names For Event Producers
5
+ cef_key_names_for_event_producers:
6
+ - act
7
+ - app
8
+ - c6a1
9
+ - c6a1Label
10
+ - c6a2
11
+ - c6a2Label
12
+ - c6a3
13
+ - c6a3Label
14
+ - c6a4
15
+ - c6a4Label
16
+ - cfp1
17
+ - cfp1Label
18
+ - cfp2
19
+ - cfp2Label
20
+ - cfp3
21
+ - cfp3Label
22
+ - cfp4
23
+ - cfp4Label
24
+ - cn1
25
+ - cn1Label
26
+ - cn2
27
+ - cn2Label
28
+ - cn3
29
+ - cn3Label
30
+ - cnt
31
+ - cs1
32
+ - cs1Label
33
+ - cs2
34
+ - cs2Label
35
+ - cs3
36
+ - cs3Label
37
+ - cs4
38
+ - cs4Label
39
+ - cs5
40
+ - cs5Label
41
+ - cs6
42
+ - cs6Label
43
+ - destinationDnsDomain
44
+ - destinationServiceName
45
+ - destinationTranslatedAddress
46
+ - destinationTranslatedPort
47
+ - deviceCustomDate1
48
+ - deviceCustomDate1Label
49
+ - deviceCustomDate2
50
+ - deviceCustomDate2Label
51
+ - deviceDirection
52
+ - deviceDnsDomain
53
+ - deviceExternalId
54
+ - deviceFacility
55
+ - deviceInboundInterface
56
+ - deviceNtDomain
57
+ - deviceOutboundInterface
58
+ - devicePayloadId
59
+ - deviceProcessName
60
+ - deviceTranslatedAddress
61
+ - dhost
62
+ - dmac
63
+ - dntdom
64
+ - dpid
65
+ - dpriv
66
+ - dproc
67
+ - dpt
68
+ - dst
69
+ - dtz
70
+ - duid
71
+ - duser
72
+ - dvc
73
+ - dvchost
74
+ - dvcmac
75
+ - dvcpid
76
+ - end
77
+ - externalId
78
+ - fileCreateTime
79
+ - fileHash
80
+ - fileId
81
+ - fileModificationTime
82
+ - filePath
83
+ - filePermission
84
+ - fileType
85
+ - flexDate1
86
+ - flexDate1Label
87
+ - flexNumber1
88
+ - flexNumber1Label
89
+ - flexNumber2
90
+ - flexNumber2Label
91
+ - flexString1
92
+ - flexString1Label
93
+ - flexString2
94
+ - flexString2Label
95
+ - fname
96
+ - fsize
97
+ - in
98
+ - msg
99
+ - oldFileCreateTime
100
+ - oldFileHash
101
+ - oldFileId
102
+ - oldFileModificationTime
103
+ - oldFileName
104
+ - oldFilePath
105
+ - oldFilePermission
106
+ - oldFileSize
107
+ - oldFileType
108
+ - out
109
+ - outcome
110
+ - proto
111
+ - reason
112
+ - request
113
+ - requestClientApplication
114
+ - requestContext
115
+ - requestCookies
116
+ - requestMethod
117
+ - rt
118
+ - shost
119
+ - smac
120
+ - sntdom
121
+ - sourceDnsDomain
122
+ - sourceServiceName
123
+ - sourceTranslatedAddress
124
+ - sourceTranslatedPort
125
+ - spid
126
+ - spriv
127
+ - sproc
128
+ - spt
129
+ - src
130
+ - start
131
+ - suid
132
+ - suser
133
+ - type
134
+ - slocation
135
+ - dlocation
136
+ - PanOSPacketsReceived
137
+ - PanOSPacketsSent
138
+ - PanOSThreatCategory
139
+ - PanOSThreatID
140
+ - PanOSThreatName
141
+ - PanOSThreatDomain
142
+ - PanOSSeverity
143
+ - PanOSReferer
144
+ - PanOSURLCatList
145
+ - PanOSLogTimeStamp
146
+ - PanOSVirtualSystem
147
+ - PanOSEventID
148
+ - PanOSStage
149
+ - PanOSAuthMethod
150
+ - PanOSTunnelType
151
+ - PanOSSourceUserName
152
+ - PanOSSourceRegion
153
+ - PanOSEndpointDeviceName
154
+ - PanOSPublicIPv4
155
+ - PanOSPublicIPv6
156
+ - PanOSPrivateIPv4
157
+ - PanOSPrivateIPv6
158
+ - PanOSHostID
159
+ - PanOSDeviceSN
160
+ - PanOSGlobalProtectClientVersion
161
+ - PanOSEndpointOSType
162
+ - PanOSEndpointOSVersion
163
+ - PanOSCountOfRepeats
164
+ - PanOSQuarantineReason
165
+ - PanOSConnectionError
166
+ - PanOSDescription
167
+ - PanOSEventStatus
168
+ - PanOSGPGatewayLocation
169
+ - PanOSLoginDuration
170
+ - PanOSConnectionMethod
171
+ - PanOSConnectionErrorID
172
+ - PanOSPortal
173
+ - PanOSSequenceNo
174
+ - PanOSActionFlags
175
+ - PanOSTimeGeneratedHighResolution
176
+ - PanOSGatewaySelectionType
177
+ - PanOSSSLResponseTime
178
+ - PanOSGatewayPriority
179
+ - PanOSAttemptedGateways
180
+ - PanOSGateway
181
+ - PanOSEndpointDeviceSN
182
+
183
+ # CEF Key Names For Event Consumers
184
+ cef_key_names_for_event_consumers:
185
+ - agentDnsDomain
186
+ - agentNtDomain
187
+ - agentTranslatedAddress
188
+ - agentTranslatedZoneExternalID
189
+ - agentTranslatedZoneURI
190
+ - agentZoneExternalID
191
+ - agentZoneURI
192
+ - agt
193
+ - ahost
194
+ - aid
195
+ - amac
196
+ - art
197
+ - at
198
+ - atz
199
+ - av
200
+ - cat
201
+ - customerExternalID
202
+ - customerURI
203
+ - destinationTranslatedZoneExternalID
204
+ - destinationTranslatedZoneURI
205
+ - destinationZoneExternalID
206
+ - destinationZoneURI
207
+ - deviceTranslatedZoneExternalID
208
+ - deviceTranslatedZoneURI
209
+ - deviceZoneExternalID
210
+ - deviceZoneURI
211
+ - dlat
212
+ - dlong
213
+ - eventId
214
+ - rawEvent
215
+ - slat
216
+ - slong
217
+ - sourceTranslatedZoneExternalID
218
+ - sourceTranslatedZoneURI
219
+ - sourceZoneExternalID
220
+ - sourceZoneURI
221
+
@@ -0,0 +1,222 @@
1
+ # -*- coding: utf-8
2
+
3
+ require 'fluent/log'
4
+ require 'fluent/plugin/parser'
5
+ require 'time'
6
+ require 'yaml'
7
+
8
+ module Fluent
9
+ module Plugin
10
+ class CommonEventFormatParser < Parser
11
+ Fluent::Plugin.register_parser("cefalt", self)
12
+
13
+ REGEXP_DETECT_RFC5424 = /^[1-9]\d{0,2}/
14
+
15
+ config_param :log_format, :string, :default => "syslog"
16
+ config_param :log_utc_offset, :string, :default => nil
17
+ config_param :syslog_timestamp_format, :string, :default => '\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}'
18
+ config_param :syslog_timestamp_format_5424, :string, :default => '\d{4}[-]\d{2}[-]\d{2}[T]\d{2}[:]\d{2}[:]\d{2}(?:\.\d{1,6})?(?:[+-]\d{2}[:]\d{2}|Z)'
19
+ config_param :cef_version, :integer, :default => 0
20
+ config_param :parse_strict_mode, :bool, :default => true
21
+ config_param :cef_keyfilename, :string, :default => 'config/cef_version_0_keys.yaml'
22
+ config_param :output_raw_field, :bool, :default => false
23
+
24
+ def configure(conf)
25
+ super
26
+ @key_value_format_regexp = /([^\s=]+)=(.*?)(?:(?=[^\s=]+=)|\z)/
27
+ @valid_format_regexp = create_valid_format_regexp
28
+ @valid_format_regexp_5424 = create_valid_format_regexp_5424
29
+ @utc_offset = get_utc_offset(@log_utc_offset)
30
+ begin
31
+ $log.trace(@valid_format_regexp)
32
+ $log.trace(@valid_format_regexp_5424)
33
+ if @parse_strict_mode
34
+ if @cef_keyfilename =~ /^\//
35
+ yaml_fieldinfo = YAML.load_file(@cef_keyfilename)
36
+ else
37
+ yaml_fieldinfo = YAML.load_file("#{File.dirname(File.expand_path(__FILE__))}/#{@cef_keyfilename}")
38
+ end
39
+ @keys_array = []
40
+ yaml_fieldinfo.each {|_key, value| @keys_array.concat(value) }
41
+ $log.info "running with strict mode, #{@keys_array.length} keys are valid."
42
+ else
43
+ $log.info "running without strict mode"
44
+ end
45
+ rescue => e
46
+ @parse_strict_mode = false
47
+ $log.warn "running without strict mode because of the following error"
48
+ $log.warn "#{e.message}"
49
+ end
50
+ end
51
+
52
+ def parse(text)
53
+ if text.nil? || text.empty?
54
+ yield nil, nil
55
+ return
56
+ end
57
+ log.trace(text)
58
+ text.force_encoding("utf-8")
59
+ replaced_text = text.scrub('?')
60
+ record = {}
61
+ if REGEXP_DETECT_RFC5424.match(text)
62
+ record_overview = @valid_format_regexp_5424.match(replaced_text)
63
+ log.trace "match 5424"
64
+ else
65
+ record_overview = @valid_format_regexp.match(replaced_text)
66
+ log.trace "match 3164"
67
+ end
68
+ if record_overview.nil?
69
+ yield Engine.now, { "raw" => replaced_text }
70
+ return
71
+ end
72
+ time = get_unixtime_with_utc_offset(record_overview["syslog_timestamp"], @utc_offset)
73
+ begin
74
+ record_overview.names.each {|key| record[key] = record_overview[key] }
75
+ text_cef_extension = record_overview["cef_extension"]
76
+ record.delete("cef_extension")
77
+ rescue
78
+ yield Engine.now, { "raw" => replaced_text }
79
+ return
80
+ end
81
+ unless text_cef_extension.nil?
82
+ record_cef_extension = parse_cef_extension(text_cef_extension)
83
+ record.merge!(record_cef_extension)
84
+ end
85
+ record["raw"] = replaced_text if @output_raw_field
86
+ yield time, record
87
+ return
88
+ end
89
+
90
+ private
91
+
92
+ def get_utc_offset(text)
93
+ utc_offset = nil
94
+ begin
95
+ utc_offset = Time.new.localtime(text).strftime("%:z")
96
+ $log.info "utc_offset: #{utc_offset}"
97
+ rescue => e
98
+ utc_offset = Time.new.localtime.strftime("%:z")
99
+ $log.info "#{e.message}, use localtime"
100
+ $log.info "utc_offset: #{utc_offset}"
101
+ end
102
+ return utc_offset
103
+ end
104
+
105
+ def create_valid_format_regexp()
106
+ case @log_format
107
+ when "syslog"
108
+ syslog_header = /
109
+ (?<syslog_timestamp>#{@syslog_timestamp_format})\s
110
+ (?<syslog_hostname>\S+)\s
111
+ (?<syslog_tag>\S*)\s*
112
+ /x
113
+ cef_header = /
114
+ CEF:(?<cef_version>#{@cef_version})\|
115
+ (?<cef_device_vendor>[^|]*)\|
116
+ (?<cef_device_product>[^|]*)\|
117
+ (?<cef_device_version>[^|]*)\|
118
+ (?<cef_device_event_class_id>[^|]*)\|
119
+ (?<cef_name>[^|]*)\|
120
+ (?<cef_severity>[^|]*)
121
+ /x
122
+ valid_format_regexp = /
123
+ \A
124
+ #{syslog_header}
125
+ (?:\u{feff})?
126
+ #{cef_header}\|
127
+ (?<cef_extension>.*)
128
+ \z
129
+ /x
130
+ else
131
+ raise Fluent::ConfigError, "#{@log_format} is unknown format"
132
+ end
133
+ return Regexp.new(valid_format_regexp)
134
+ end
135
+
136
+ def create_valid_format_regexp_5424()
137
+ case @log_format
138
+ when "syslog"
139
+ syslog_header = /
140
+ (?:[1-9])\s
141
+ (?<syslog_timestamp>#{@syslog_timestamp_format_5424})\s
142
+ (?<syslog_hostname>\S+)\s
143
+ (?<syslog_tag>\S+)\s
144
+ (?<pid>\S+)\s
145
+ (?<msgid>\S+)\s
146
+ (?<extradata>(?:\-|(?:\[.*?(?<!\\)\])+))\s
147
+ /x
148
+ cef_header = /
149
+ CEF:(?<cef_version>#{@cef_version})\|
150
+ (?<cef_device_vendor>[^|]*)\|
151
+ (?<cef_device_product>[^|]*)\|
152
+ (?<cef_device_version>[^|]*)\|
153
+ (?<cef_device_event_class_id>[^|]*)\|
154
+ (?<cef_name>[^|]*)\|
155
+ (?<cef_severity>[^|]*)
156
+ /x
157
+ valid_format_regexp_5424 = /
158
+ \A
159
+ #{syslog_header}
160
+ #{cef_header}\|
161
+ (?<cef_extension>.*)
162
+ \z
163
+ /x
164
+ else
165
+ raise Fluent::ConfigError, "#{@log_format} is unknown format"
166
+ end
167
+ return Regexp.new(valid_format_regexp_5424)
168
+ end
169
+
170
+ def get_unixtime_with_utc_offset(timestamp, utc_offset)
171
+ unixtime = nil
172
+ begin
173
+ if timestamp =~ /[-+]\d{2}:?\d{2}\z/
174
+ unixtime = Time.parse(timestamp).to_i
175
+ else
176
+ unixtime = Time.parse("#{timestamp} #{utc_offset}").to_i
177
+ end
178
+ rescue
179
+ unixtime = Engine.now
180
+ end
181
+ return unixtime
182
+ end
183
+
184
+ def parse_cef_extension(text)
185
+ if @parse_strict_mode == true
186
+ return parse_cef_extension_with_strict_mode(text)
187
+ else
188
+ return parse_cef_extension_without_strict_mode(text)
189
+ end
190
+ end
191
+
192
+ def parse_cef_extension_with_strict_mode(text)
193
+ record = {}
194
+ begin
195
+ last_valid_key_name = nil
196
+ text.scan(@key_value_format_regexp) do |key, value|
197
+ if @keys_array.include?(key)
198
+ record[key] = value
199
+ record[last_valid_key_name].rstrip! unless last_valid_key_name.nil?
200
+ last_valid_key_name = key
201
+ else
202
+ record[last_valid_key_name].concat("#{key}=#{value}")
203
+ end
204
+ end
205
+ rescue
206
+ return {}
207
+ end
208
+ return record
209
+ end
210
+
211
+ def parse_cef_extension_without_strict_mode(text)
212
+ record = {}
213
+ begin
214
+ text.scan(@key_value_format_regexp) {|key, value| record[key] = value.rstrip }
215
+ rescue
216
+ return {}
217
+ end
218
+ return record
219
+ end
220
+ end
221
+ end
222
+ end