fluent-plugin-netflow-enchanced 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,160 @@
1
+ require "bindata"
2
+
3
+ module Fluent
4
+ module Plugin
5
+ class NetflowParser < Parser
6
+ class IP4Addr < BinData::Primitive
7
+ endian :big
8
+ uint32 :storage
9
+
10
+ def set(val)
11
+ ip = IPAddr.new(val)
12
+ if ! ip.ipv4?
13
+ raise ArgumentError, "invalid IPv4 address '#{val}'"
14
+ end
15
+ self.storage = ip.to_i
16
+ end
17
+
18
+ def get
19
+ IPAddr.new_ntoh([self.storage].pack('N')).to_s
20
+ end
21
+ end
22
+
23
+ class IP6Addr < BinData::Primitive
24
+ endian :big
25
+ uint128 :storage
26
+
27
+ def set(val)
28
+ ip = IPAddr.new(val)
29
+ if ! ip.ipv6?
30
+ raise ArgumentError, "invalid IPv6 address `#{val}'"
31
+ end
32
+ self.storage = ip.to_i
33
+ end
34
+
35
+ def get
36
+ IPAddr.new_ntoh((0..7).map { |i|
37
+ (self.storage >> (112 - 16 * i)) & 0xffff
38
+ }.pack('n8')).to_s
39
+ end
40
+ end
41
+
42
+ class MacAddr < BinData::Primitive
43
+ array :bytes, type: :uint8, initial_length: 6
44
+
45
+ def set(val)
46
+ ints = val.split(/:/).collect { |int| int.to_i(16) }
47
+ self.bytes = ints
48
+ end
49
+
50
+ def get
51
+ self.bytes.collect { |byte| byte.value.to_s(16).rjust(2,'0') }.join(":")
52
+ end
53
+ end
54
+
55
+ class MplsLabel < BinData::Primitive
56
+ bit20 :label
57
+ bit3 :exp
58
+ bit1 :bottom
59
+ def set(val)
60
+ self.label = val >> 4
61
+ self.exp = (val & 0b1111) >> 1
62
+ self.bottom = val & 0b1
63
+ end
64
+ def get
65
+ self.label
66
+ end
67
+ end
68
+
69
+ class Header < BinData::Record
70
+ endian :big
71
+ uint16 :version
72
+ end
73
+
74
+ class Netflow5PDU < BinData::Record
75
+ endian :big
76
+ uint16 :version
77
+ uint16 :flow_records
78
+ uint32 :uptime
79
+ uint32 :unix_sec
80
+ uint32 :unix_nsec
81
+ uint32 :flow_seq_num
82
+ uint8 :engine_type
83
+ uint8 :engine_id
84
+ bit2 :sampling_algorithm
85
+ bit14 :sampling_interval
86
+ array :records, initial_length: :flow_records do
87
+ ip4_addr :ipv4_src_addr
88
+ ip4_addr :ipv4_dst_addr
89
+ ip4_addr :ipv4_next_hop
90
+ uint16 :input_snmp
91
+ uint16 :output_snmp
92
+ uint32 :in_pkts
93
+ uint32 :in_bytes
94
+ uint32 :first_switched
95
+ uint32 :last_switched
96
+ uint16 :l4_src_port
97
+ uint16 :l4_dst_port
98
+ skip length: 1
99
+ uint8 :tcp_flags # Split up the TCP flags maybe?
100
+ uint8 :protocol
101
+ uint8 :src_tos
102
+ uint16 :src_as
103
+ uint16 :dst_as
104
+ uint8 :src_mask
105
+ uint8 :dst_mask
106
+ skip length: 2
107
+ end
108
+ end
109
+
110
+ class TemplateFlowset < BinData::Record
111
+ endian :big
112
+ array :templates, read_until: lambda { array.num_bytes == flowset_length - 4 } do
113
+ uint16 :template_id
114
+ uint16 :field_count
115
+ array :template_fields, initial_length: :field_count do
116
+ uint16 :field_type
117
+ uint16 :field_length
118
+ end
119
+ end
120
+ end
121
+
122
+ class OptionFlowset < BinData::Record
123
+ endian :big
124
+ array :templates, read_until: lambda { flowset_length - 4 - array.num_bytes <= 2 } do
125
+ uint16 :template_id
126
+ uint16 :scope_length
127
+ uint16 :option_length
128
+ array :scope_fields, initial_length: lambda { scope_length / 4 } do
129
+ uint16 :field_type
130
+ uint16 :field_length
131
+ end
132
+ array :option_fields, initial_length: lambda { option_length / 4 } do
133
+ uint16 :field_type
134
+ uint16 :field_length
135
+ end
136
+ end
137
+ skip length: lambda { templates.length.odd? ? 2 : 0 }
138
+ end
139
+
140
+ class Netflow9PDU < BinData::Record
141
+ endian :big
142
+ uint16 :version
143
+ uint16 :flow_records
144
+ uint32 :uptime
145
+ uint32 :unix_sec
146
+ uint32 :flow_seq_num
147
+ uint32 :source_id
148
+ array :records, read_until: :eof do
149
+ uint16 :flowset_id
150
+ uint16 :flowset_length
151
+ choice :flowset_data, selection: :flowset_id do
152
+ template_flowset 0
153
+ option_flowset 1
154
+ string :default, read_length: lambda { flowset_length - 4 }
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,403 @@
1
+ require "ipaddr"
2
+ require 'yaml'
3
+
4
+ require 'fluent/plugin/parser'
5
+
6
+ require_relative 'netflow_records'
7
+ require_relative 'vash'
8
+
9
+ module Fluent
10
+ module Plugin
11
+ # port from logstash's netflow parser
12
+ class NetflowParser < Parser
13
+ Fluent::Plugin.register_parser('netflow', self)
14
+
15
+ config_param :switched_times_from_uptime, :bool, default: false
16
+ config_param :cache_ttl, :integer, default: 4000
17
+ config_param :versions, :array, default: [5, 9]
18
+ config_param :definitions, :string, default: nil
19
+
20
+ # Cisco NetFlow Export Datagram Format
21
+ # http://www.cisco.com/c/en/us/td/docs/net_mgmt/netflow_collection_engine/3-6/user/guide/format.html
22
+ # Cisco NetFlow Version 9 Flow-Record Format
23
+ # http://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html
24
+
25
+ def configure(conf)
26
+ super
27
+
28
+ @templates = Vash.new()
29
+ @samplers_v9 = Vash.new()
30
+ # Path to default Netflow v9 field definitions
31
+ filename = File.expand_path('../netflow_fields.yaml', __FILE__)
32
+
33
+ begin
34
+ @template_fields = YAML.load_file(filename)
35
+ rescue => e
36
+ raise Fluent::ConfigError, "Bad syntax in definitions file #{filename}, error_class = #{e.class.name}, error = #{e.message}"
37
+ end
38
+
39
+ # Allow the user to augment/override/rename the supported Netflow fields
40
+ if @definitions
41
+ raise Fluent::ConfigError, "definitions file #{@definitions} doesn't exist" unless File.exist?(@definitions)
42
+ begin
43
+ @template_fields['option'].merge!(YAML.load_file(@definitions))
44
+ rescue => e
45
+ raise Fluent::ConfigError, "Bad syntax in definitions file #{@definitions}, error_class = #{e.class.name}, error = #{e.message}"
46
+ end
47
+ end
48
+ end
49
+
50
+ def call(payload, host=nil, &block)
51
+ version,_ = payload[0,2].unpack('n')
52
+ case version
53
+ when 5
54
+ forV5(payload, block)
55
+ when 9
56
+ # TODO: implement forV9
57
+ pdu = Netflow9PDU.read(payload)
58
+ handle_v9(host, pdu, block)
59
+ else
60
+ $log.warn "Unsupported Netflow version v#{version}: #{version.class}"
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def ipv4_addr_to_string(uint32)
67
+ "#{(uint32 & 0xff000000) >> 24}.#{(uint32 & 0x00ff0000) >> 16}.#{(uint32 & 0x0000ff00) >> 8}.#{uint32 & 0x000000ff}"
68
+ end
69
+
70
+ def msec_from_boot_to_time(msec, uptime, current_unix_time, current_nsec)
71
+ millis = uptime - msec
72
+ seconds = current_unix_time - (millis / 1000)
73
+ micros = (current_nsec / 1000) - ((millis % 1000) * 1000)
74
+ if micros < 0
75
+ seconds -= 1
76
+ micros += 1000000
77
+ end
78
+ Time.at(seconds, micros)
79
+ end
80
+
81
+ def format_for_switched(time)
82
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ".freeze)
83
+ end
84
+
85
+ def format_for_flowSeconds(time)
86
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S".freeze)
87
+ end
88
+
89
+ def format_for_flowMilliSeconds(time)
90
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ".freeze)
91
+ end
92
+
93
+ def format_for_flowMicroSeconds(time)
94
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%6NZ".freeze)
95
+ end
96
+
97
+ def format_for_flowNanoSeconds(time)
98
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%9NZ".freeze)
99
+ end
100
+
101
+ NETFLOW_V5_HEADER_FORMAT = 'nnNNNNnn'
102
+ NETFLOW_V5_HEADER_BYTES = 24
103
+ NETFLOW_V5_RECORD_FORMAT = 'NNNnnNNNNnnnnnnnxx'
104
+ NETFLOW_V5_RECORD_BYTES = 48
105
+
106
+ # V5 header
107
+ # uint16 :version # n
108
+ # uint16 :flow_records # n
109
+ # uint32 :uptime # N
110
+ # uint32 :unix_sec # N
111
+ # uint32 :unix_nsec # N
112
+ # uint32 :flow_seq_num # N
113
+ # uint8 :engine_type # n -> 0xff00
114
+ # uint8 :engine_id # -> 0x00ff
115
+ # bit2 :sampling_algorithm # n -> 0b1100000000000000
116
+ # bit14 :sampling_interval # -> 0b0011111111111111
117
+
118
+ # V5 records
119
+ # array :records, initial_length: :flow_records do
120
+ # ip4_addr :ipv4_src_addr # uint32 N
121
+ # ip4_addr :ipv4_dst_addr # uint32 N
122
+ # ip4_addr :ipv4_next_hop # uint32 N
123
+ # uint16 :input_snmp # n
124
+ # uint16 :output_snmp # n
125
+ # uint32 :in_pkts # N
126
+ # uint32 :in_bytes # N
127
+ # uint32 :first_switched # N
128
+ # uint32 :last_switched # N
129
+ # uint16 :l4_src_port # n
130
+ # uint16 :l4_dst_port # n
131
+ # skip length: 1 # n -> (ignored)
132
+ # uint8 :tcp_flags # -> 0x00ff
133
+ # uint8 :protocol # n -> 0xff00
134
+ # uint8 :src_tos # -> 0x00ff
135
+ # uint16 :src_as # n
136
+ # uint16 :dst_as # n
137
+ # uint8 :src_mask # n -> 0xff00
138
+ # uint8 :dst_mask # -> 0x00ff
139
+ # skip length: 2 # xx
140
+ # end
141
+ def forV5(payload, block)
142
+ version, flow_records, uptime, unix_sec, unix_nsec, flow_seq_num, engine, sampling = payload.unpack(NETFLOW_V5_HEADER_FORMAT)
143
+ engine_type = (engine & 0xff00) >> 8
144
+ engine_id = engine & 0x00ff
145
+ sampling_algorithm = (sampling & 0b1100000000000000) >> 14
146
+ sampling_interval = sampling & 0b0011111111111111
147
+
148
+ time = Time.at(unix_sec, unix_nsec / 1000).to_i # TODO: Fluent::EventTime
149
+
150
+ records_bytes = payload.bytesize - NETFLOW_V5_HEADER_BYTES
151
+
152
+ if records_bytes / NETFLOW_V5_RECORD_BYTES != flow_records
153
+ $log.warn "bytesize mismatch, records_bytes:#{records_bytes}, records:#{flow_records}"
154
+ return
155
+ end
156
+
157
+ format_full = NETFLOW_V5_RECORD_FORMAT * flow_records
158
+ objects = payload[NETFLOW_V5_HEADER_BYTES, records_bytes].unpack(format_full)
159
+
160
+ while objects.size > 0
161
+ src_addr, dst_addr, next_hop, input_snmp, output_snmp,
162
+ in_pkts, in_bytes, first_switched, last_switched, l4_src_port, l4_dst_port,
163
+ tcp_flags_16, protocol_src_tos, src_as, dst_as, src_dst_mask = objects.shift(16)
164
+ record = {
165
+ "version" => version,
166
+ "uptime" => uptime,
167
+ "flow_records" => flow_records,
168
+ "flow_seq_num" => flow_seq_num,
169
+ "engine_type" => engine_type,
170
+ "engine_id" => engine_id,
171
+ "sampling_algorithm" => sampling_algorithm,
172
+ "sampling_interval" => sampling_interval,
173
+
174
+ "ipv4_src_addr" => ipv4_addr_to_string(src_addr),
175
+ "ipv4_dst_addr" => ipv4_addr_to_string(dst_addr),
176
+ "ipv4_next_hop" => ipv4_addr_to_string(next_hop),
177
+ "input_snmp" => input_snmp,
178
+ "output_snmp" => output_snmp,
179
+ "in_pkts" => in_pkts,
180
+ "in_bytes" => in_bytes,
181
+ "first_switched" => first_switched,
182
+ "last_switched" => last_switched,
183
+ "l4_src_port" => l4_src_port,
184
+ "l4_dst_port" => l4_dst_port,
185
+ "tcp_flags" => tcp_flags_16 & 0x00ff,
186
+ "protocol" => (protocol_src_tos & 0xff00) >> 8,
187
+ "src_tos" => (protocol_src_tos & 0x00ff),
188
+ "src_as" => src_as,
189
+ "dst_as" => dst_as,
190
+ "src_mask" => (src_dst_mask & 0xff00) >> 8,
191
+ "dst_mask" => (src_dst_mask & 0x00ff)
192
+ }
193
+ unless @switched_times_from_uptime
194
+ record["first_switched"] = format_for_switched(msec_from_boot_to_time(record["first_switched"], uptime, unix_sec, unix_nsec))
195
+ record["last_switched"] = format_for_switched(msec_from_boot_to_time(record["last_switched"] , uptime, unix_sec, unix_nsec))
196
+ end
197
+
198
+ block.call(time, record)
199
+ end
200
+ end
201
+
202
+ def handle_v9(host, pdu, block)
203
+ pdu.records.each do |flowset|
204
+ case flowset.flowset_id
205
+ when 0
206
+ handle_v9_flowset_template(host, pdu, flowset)
207
+ when 1
208
+ handle_v9_flowset_options_template(host, pdu, flowset)
209
+ when 256..65535
210
+ handle_v9_flowset_data(host, pdu, flowset, block)
211
+ else
212
+ $log.warn 'Unsupported flowset', flowset_id: flowset.flowset_id
213
+ end
214
+ end
215
+ end
216
+
217
+ def handle_v9_flowset_template(host, pdu, flowset)
218
+ flowset.flowset_data.templates.each do |template|
219
+ catch (:field) do
220
+ template_fields = []
221
+ template.template_fields.each do |field|
222
+ entry = netflow_field_for(field.field_type, field.field_length)
223
+ throw :field unless entry
224
+
225
+ template_fields << entry
226
+ end
227
+ # We get this far, we have a list of fields
228
+ key = "#{host}|#{pdu.source_id}|#{template.template_id}"
229
+ @templates[key, @cache_ttl] = BinData::Struct.new(endian: :big, fields: template_fields)
230
+ # Purge any expired templates
231
+ @templates.cleanup!
232
+ end
233
+ end
234
+ end
235
+
236
+ NETFLOW_V9_FIELD_CATEGORIES = ['scope', 'option']
237
+
238
+ def handle_v9_flowset_options_template(host, pdu, flowset)
239
+ flowset.flowset_data.templates.each do |template|
240
+ catch (:field) do
241
+ template_fields = []
242
+
243
+ NETFLOW_V9_FIELD_CATEGORIES.each do |category|
244
+ template["#{category}_fields"].each do |field|
245
+ entry = netflow_field_for(field.field_type, field.field_length, category)
246
+ throw :field unless entry
247
+
248
+ template_fields << entry
249
+ end
250
+ end
251
+
252
+ # We get this far, we have a list of fields
253
+ key = "#{host}|#{pdu.source_id}|#{template.template_id}"
254
+ @templates[key, @cache_ttl] = BinData::Struct.new(endian: :big, fields: template_fields)
255
+ # Purge any expired templates
256
+ @templates.cleanup!
257
+ end
258
+ end
259
+ end
260
+
261
+ FIELDS_FOR_COPY_V9 = ['version', 'flow_seq_num']
262
+
263
+ def handle_v9_flowset_data(host, pdu, flowset, block)
264
+ template_key = "#{host}|#{pdu.source_id}|#{flowset.flowset_id}"
265
+ template = @templates[template_key]
266
+ if ! template
267
+ $log.warn 'No matching template for',
268
+ host: host, source_id: pdu.source_id, flowset_id: flowset.flowset_id
269
+ return
270
+ end
271
+
272
+ length = flowset.flowset_length - 4
273
+
274
+ # Template shouldn't be longer than the flowset and there should
275
+ # be at most 3 padding bytes
276
+ if template.num_bytes > length or ! (length % template.num_bytes).between?(0, 3)
277
+ $log.warn "Template length doesn't fit cleanly into flowset",
278
+ template_id: flowset.flowset_id, template_length: template.num_bytes, flowset_length: length
279
+ return
280
+ end
281
+
282
+ array = BinData::Array.new(type: template, initial_length: length / template.num_bytes)
283
+
284
+ template_fields = array.read(flowset.flowset_data)
285
+ template_fields.each do |r|
286
+ if is_sampler?(r)
287
+ sampler_key = "#{host}|#{pdu.source_id}|#{r.flow_sampler_id}"
288
+ register_sampler_v9 sampler_key, r
289
+ next
290
+ end
291
+
292
+ time = pdu.unix_sec # TODO: Fluent::EventTime (see: forV5)
293
+ event = {}
294
+
295
+ # Fewer fields in the v9 header
296
+ FIELDS_FOR_COPY_V9.each do |f|
297
+ event[f] = pdu[f]
298
+ end
299
+
300
+ event['flowset_id'] = flowset.flowset_id
301
+
302
+ r.each_pair do |k, v|
303
+ case k
304
+ when :first_switched
305
+ unless @switched_times_from_uptime
306
+ event[k.to_s] = format_for_switched(msec_from_boot_to_time(v.snapshot, pdu.uptime, time, 0))
307
+ end
308
+ when :last_switched
309
+ unless @switched_times_from_uptime
310
+ event[k.to_s] = format_for_switched(msec_from_boot_to_time(v.snapshot, pdu.uptime, time, 0))
311
+ end
312
+ when :flowStartSeconds
313
+ event[k.to_s] = format_for_flowSeconds(Time.at(v.snapshot, 0))
314
+ when :flowEndSeconds
315
+ event[k.to_s] = format_for_flowSeconds(Time.at(v.snapshot, 0))
316
+ when :flowStartMilliseconds
317
+ divisor = 1_000
318
+ microseconds = (v.snapshot % 1_000) * 1_000
319
+ event[k.to_s] = format_for_flowMilliSeconds(Time.at(v.snapshot / divisor, microseconds))
320
+ when :flowEndMilliseconds
321
+ divisor = 1_000
322
+ microseconds = (v.snapshot % 1_000) * 1_000
323
+ event[k.to_s] = format_for_flowMilliSeconds(Time.at(v.snapshot / divisor, microseconds))
324
+ when :flowStartMicroseconds
325
+ divisor = 1_000_000
326
+ microseconds = (v.snapshot % 1_000_000)
327
+ event[k.to_s] = format_for_flowMicroSeconds(Time.at(v.snapshot / divisor, microseconds))
328
+ when :flowEndMicroseconds
329
+ divisor = 1_000_000
330
+ microseconds = (v.snapshot % 1_000_000)
331
+ event[k.to_s] = format_for_flowMicroSeconds(Time.at(v.snapshot / divisor, microseconds))
332
+ when :flowStartNanoseconds
333
+ divisor = 1_000_000_000
334
+ microseconds = (v.snapshot % 1_000_000_000) / 1_000
335
+ nanoseconds = v.snapshot % 1_000_000_000
336
+ time_with_nano = Time.at(v.snapshot / divisor, microseconds)
337
+ time_with_nano.nsec = nanoseconds
338
+ event[k.to_s] = format_for_flowNanoSeconds(time_with_nano)
339
+ when :flowEndNanoseconds
340
+ divisor = 1_000_000_000
341
+ microseconds = (v.snapshot % 1_000_000_000) / 1_000
342
+ nanoseconds = v.snapshot % 1_000_000_000
343
+ time_with_nano = Time.at(v.snapshot / divisor, microseconds)
344
+ time_with_nano.nsec = nanoseconds
345
+ event[k.to_s] = format_for_flowNanoSeconds(time_with_nano)
346
+ else
347
+ event[k.to_s] = v.snapshot
348
+ end
349
+ end
350
+
351
+ if sampler_id = r['flow_sampler_id']
352
+ sampler_key = "#{host}|#{pdu.source_id}|#{sampler_id}"
353
+ if sampler = @samplers_v9[sampler_key]
354
+ event['sampling_algorithm'] ||= sampler['flow_sampler_mode']
355
+ event['sampling_interval'] ||= sampler['flow_sampler_random_interval']
356
+ end
357
+ end
358
+
359
+ block.call(time, event)
360
+ end
361
+ end
362
+
363
+ def uint_field(length, default)
364
+ # If length is 4, return :uint32, etc. and use default if length is 0
365
+ ("uint" + (((length > 0) ? length : default) * 8).to_s).to_sym
366
+ end
367
+
368
+ def netflow_field_for(type, length, category = 'option'.freeze)
369
+ unless field = @template_fields[category][type]
370
+ $log.warn "Skip unsupported field", type: type, length: length
371
+ return [:skip, nil, {length: length}]
372
+ end
373
+
374
+ unless field.is_a?(Array)
375
+ $log.warn "Skip non-Array definition", field: field
376
+ return [:skip, nil, {length: length}]
377
+ end
378
+
379
+ # Small bit of fixup for numeric value, :skip or :string field length, which are dynamic
380
+ case field[0]
381
+ when Integer
382
+ [uint_field(length, field[0]), field[1]]
383
+ when :skip
384
+ field + [nil, {length: length}]
385
+ when :string
386
+ field + [{length: length, trim_padding: true}]
387
+ else
388
+ field
389
+ end
390
+ end
391
+
392
+ # covers Netflow v9 and v10 (a.k.a IPFIX)
393
+ def is_sampler?(record)
394
+ record['flow_sampler_id'] && record['flow_sampler_mode'] && record['flow_sampler_random_interval']
395
+ end
396
+
397
+ def register_sampler_v9(key, sampler)
398
+ @samplers_v9[key, @cache_ttl] = sampler
399
+ @samplers_v9.cleanup!
400
+ end
401
+ end
402
+ end
403
+ end