fluent-plugin-netflow-enchanced 1.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,160 @@
1
+ require "bindata"
2
+
3
+ module Fluent
4
+ module Plugin
5
+ class NetflowParser < Parser
6
+ class IP4Addr < BinData::Primitive
7
+ endian :big
8
+ uint32 :storage
9
+
10
+ def set(val)
11
+ ip = IPAddr.new(val)
12
+ if ! ip.ipv4?
13
+ raise ArgumentError, "invalid IPv4 address '#{val}'"
14
+ end
15
+ self.storage = ip.to_i
16
+ end
17
+
18
+ def get
19
+ IPAddr.new_ntoh([self.storage].pack('N')).to_s
20
+ end
21
+ end
22
+
23
+ class IP6Addr < BinData::Primitive
24
+ endian :big
25
+ uint128 :storage
26
+
27
+ def set(val)
28
+ ip = IPAddr.new(val)
29
+ if ! ip.ipv6?
30
+ raise ArgumentError, "invalid IPv6 address `#{val}'"
31
+ end
32
+ self.storage = ip.to_i
33
+ end
34
+
35
+ def get
36
+ IPAddr.new_ntoh((0..7).map { |i|
37
+ (self.storage >> (112 - 16 * i)) & 0xffff
38
+ }.pack('n8')).to_s
39
+ end
40
+ end
41
+
42
+ class MacAddr < BinData::Primitive
43
+ array :bytes, type: :uint8, initial_length: 6
44
+
45
+ def set(val)
46
+ ints = val.split(/:/).collect { |int| int.to_i(16) }
47
+ self.bytes = ints
48
+ end
49
+
50
+ def get
51
+ self.bytes.collect { |byte| byte.value.to_s(16).rjust(2,'0') }.join(":")
52
+ end
53
+ end
54
+
55
+ class MplsLabel < BinData::Primitive
56
+ bit20 :label
57
+ bit3 :exp
58
+ bit1 :bottom
59
+ def set(val)
60
+ self.label = val >> 4
61
+ self.exp = (val & 0b1111) >> 1
62
+ self.bottom = val & 0b1
63
+ end
64
+ def get
65
+ self.label
66
+ end
67
+ end
68
+
69
+ class Header < BinData::Record
70
+ endian :big
71
+ uint16 :version
72
+ end
73
+
74
+ class Netflow5PDU < BinData::Record
75
+ endian :big
76
+ uint16 :version
77
+ uint16 :flow_records
78
+ uint32 :uptime
79
+ uint32 :unix_sec
80
+ uint32 :unix_nsec
81
+ uint32 :flow_seq_num
82
+ uint8 :engine_type
83
+ uint8 :engine_id
84
+ bit2 :sampling_algorithm
85
+ bit14 :sampling_interval
86
+ array :records, initial_length: :flow_records do
87
+ ip4_addr :ipv4_src_addr
88
+ ip4_addr :ipv4_dst_addr
89
+ ip4_addr :ipv4_next_hop
90
+ uint16 :input_snmp
91
+ uint16 :output_snmp
92
+ uint32 :in_pkts
93
+ uint32 :in_bytes
94
+ uint32 :first_switched
95
+ uint32 :last_switched
96
+ uint16 :l4_src_port
97
+ uint16 :l4_dst_port
98
+ skip length: 1
99
+ uint8 :tcp_flags # Split up the TCP flags maybe?
100
+ uint8 :protocol
101
+ uint8 :src_tos
102
+ uint16 :src_as
103
+ uint16 :dst_as
104
+ uint8 :src_mask
105
+ uint8 :dst_mask
106
+ skip length: 2
107
+ end
108
+ end
109
+
110
+ class TemplateFlowset < BinData::Record
111
+ endian :big
112
+ array :templates, read_until: lambda { array.num_bytes == flowset_length - 4 } do
113
+ uint16 :template_id
114
+ uint16 :field_count
115
+ array :template_fields, initial_length: :field_count do
116
+ uint16 :field_type
117
+ uint16 :field_length
118
+ end
119
+ end
120
+ end
121
+
122
+ class OptionFlowset < BinData::Record
123
+ endian :big
124
+ array :templates, read_until: lambda { flowset_length - 4 - array.num_bytes <= 2 } do
125
+ uint16 :template_id
126
+ uint16 :scope_length
127
+ uint16 :option_length
128
+ array :scope_fields, initial_length: lambda { scope_length / 4 } do
129
+ uint16 :field_type
130
+ uint16 :field_length
131
+ end
132
+ array :option_fields, initial_length: lambda { option_length / 4 } do
133
+ uint16 :field_type
134
+ uint16 :field_length
135
+ end
136
+ end
137
+ skip length: lambda { templates.length.odd? ? 2 : 0 }
138
+ end
139
+
140
+ class Netflow9PDU < BinData::Record
141
+ endian :big
142
+ uint16 :version
143
+ uint16 :flow_records
144
+ uint32 :uptime
145
+ uint32 :unix_sec
146
+ uint32 :flow_seq_num
147
+ uint32 :source_id
148
+ array :records, read_until: :eof do
149
+ uint16 :flowset_id
150
+ uint16 :flowset_length
151
+ choice :flowset_data, selection: :flowset_id do
152
+ template_flowset 0
153
+ option_flowset 1
154
+ string :default, read_length: lambda { flowset_length - 4 }
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,403 @@
1
+ require "ipaddr"
2
+ require 'yaml'
3
+
4
+ require 'fluent/plugin/parser'
5
+
6
+ require_relative 'netflow_records'
7
+ require_relative 'vash'
8
+
9
+ module Fluent
10
+ module Plugin
11
+ # port from logstash's netflow parser
12
+ class NetflowParser < Parser
13
+ Fluent::Plugin.register_parser('netflow', self)
14
+
15
+ config_param :switched_times_from_uptime, :bool, default: false
16
+ config_param :cache_ttl, :integer, default: 4000
17
+ config_param :versions, :array, default: [5, 9]
18
+ config_param :definitions, :string, default: nil
19
+
20
+ # Cisco NetFlow Export Datagram Format
21
+ # http://www.cisco.com/c/en/us/td/docs/net_mgmt/netflow_collection_engine/3-6/user/guide/format.html
22
+ # Cisco NetFlow Version 9 Flow-Record Format
23
+ # http://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html
24
+
25
+ def configure(conf)
26
+ super
27
+
28
+ @templates = Vash.new()
29
+ @samplers_v9 = Vash.new()
30
+ # Path to default Netflow v9 field definitions
31
+ filename = File.expand_path('../netflow_fields.yaml', __FILE__)
32
+
33
+ begin
34
+ @template_fields = YAML.load_file(filename)
35
+ rescue => e
36
+ raise Fluent::ConfigError, "Bad syntax in definitions file #{filename}, error_class = #{e.class.name}, error = #{e.message}"
37
+ end
38
+
39
+ # Allow the user to augment/override/rename the supported Netflow fields
40
+ if @definitions
41
+ raise Fluent::ConfigError, "definitions file #{@definitions} doesn't exist" unless File.exist?(@definitions)
42
+ begin
43
+ @template_fields['option'].merge!(YAML.load_file(@definitions))
44
+ rescue => e
45
+ raise Fluent::ConfigError, "Bad syntax in definitions file #{@definitions}, error_class = #{e.class.name}, error = #{e.message}"
46
+ end
47
+ end
48
+ end
49
+
50
+ def call(payload, host=nil, &block)
51
+ version,_ = payload[0,2].unpack('n')
52
+ case version
53
+ when 5
54
+ forV5(payload, block)
55
+ when 9
56
+ # TODO: implement forV9
57
+ pdu = Netflow9PDU.read(payload)
58
+ handle_v9(host, pdu, block)
59
+ else
60
+ $log.warn "Unsupported Netflow version v#{version}: #{version.class}"
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def ipv4_addr_to_string(uint32)
67
+ "#{(uint32 & 0xff000000) >> 24}.#{(uint32 & 0x00ff0000) >> 16}.#{(uint32 & 0x0000ff00) >> 8}.#{uint32 & 0x000000ff}"
68
+ end
69
+
70
+ def msec_from_boot_to_time(msec, uptime, current_unix_time, current_nsec)
71
+ millis = uptime - msec
72
+ seconds = current_unix_time - (millis / 1000)
73
+ micros = (current_nsec / 1000) - ((millis % 1000) * 1000)
74
+ if micros < 0
75
+ seconds -= 1
76
+ micros += 1000000
77
+ end
78
+ Time.at(seconds, micros)
79
+ end
80
+
81
+ def format_for_switched(time)
82
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ".freeze)
83
+ end
84
+
85
+ def format_for_flowSeconds(time)
86
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S".freeze)
87
+ end
88
+
89
+ def format_for_flowMilliSeconds(time)
90
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ".freeze)
91
+ end
92
+
93
+ def format_for_flowMicroSeconds(time)
94
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%6NZ".freeze)
95
+ end
96
+
97
+ def format_for_flowNanoSeconds(time)
98
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%9NZ".freeze)
99
+ end
100
+
101
+ NETFLOW_V5_HEADER_FORMAT = 'nnNNNNnn'
102
+ NETFLOW_V5_HEADER_BYTES = 24
103
+ NETFLOW_V5_RECORD_FORMAT = 'NNNnnNNNNnnnnnnnxx'
104
+ NETFLOW_V5_RECORD_BYTES = 48
105
+
106
+ # V5 header
107
+ # uint16 :version # n
108
+ # uint16 :flow_records # n
109
+ # uint32 :uptime # N
110
+ # uint32 :unix_sec # N
111
+ # uint32 :unix_nsec # N
112
+ # uint32 :flow_seq_num # N
113
+ # uint8 :engine_type # n -> 0xff00
114
+ # uint8 :engine_id # -> 0x00ff
115
+ # bit2 :sampling_algorithm # n -> 0b1100000000000000
116
+ # bit14 :sampling_interval # -> 0b0011111111111111
117
+
118
+ # V5 records
119
+ # array :records, initial_length: :flow_records do
120
+ # ip4_addr :ipv4_src_addr # uint32 N
121
+ # ip4_addr :ipv4_dst_addr # uint32 N
122
+ # ip4_addr :ipv4_next_hop # uint32 N
123
+ # uint16 :input_snmp # n
124
+ # uint16 :output_snmp # n
125
+ # uint32 :in_pkts # N
126
+ # uint32 :in_bytes # N
127
+ # uint32 :first_switched # N
128
+ # uint32 :last_switched # N
129
+ # uint16 :l4_src_port # n
130
+ # uint16 :l4_dst_port # n
131
+ # skip length: 1 # n -> (ignored)
132
+ # uint8 :tcp_flags # -> 0x00ff
133
+ # uint8 :protocol # n -> 0xff00
134
+ # uint8 :src_tos # -> 0x00ff
135
+ # uint16 :src_as # n
136
+ # uint16 :dst_as # n
137
+ # uint8 :src_mask # n -> 0xff00
138
+ # uint8 :dst_mask # -> 0x00ff
139
+ # skip length: 2 # xx
140
+ # end
141
+ def forV5(payload, block)
142
+ version, flow_records, uptime, unix_sec, unix_nsec, flow_seq_num, engine, sampling = payload.unpack(NETFLOW_V5_HEADER_FORMAT)
143
+ engine_type = (engine & 0xff00) >> 8
144
+ engine_id = engine & 0x00ff
145
+ sampling_algorithm = (sampling & 0b1100000000000000) >> 14
146
+ sampling_interval = sampling & 0b0011111111111111
147
+
148
+ time = Time.at(unix_sec, unix_nsec / 1000).to_i # TODO: Fluent::EventTime
149
+
150
+ records_bytes = payload.bytesize - NETFLOW_V5_HEADER_BYTES
151
+
152
+ if records_bytes / NETFLOW_V5_RECORD_BYTES != flow_records
153
+ $log.warn "bytesize mismatch, records_bytes:#{records_bytes}, records:#{flow_records}"
154
+ return
155
+ end
156
+
157
+ format_full = NETFLOW_V5_RECORD_FORMAT * flow_records
158
+ objects = payload[NETFLOW_V5_HEADER_BYTES, records_bytes].unpack(format_full)
159
+
160
+ while objects.size > 0
161
+ src_addr, dst_addr, next_hop, input_snmp, output_snmp,
162
+ in_pkts, in_bytes, first_switched, last_switched, l4_src_port, l4_dst_port,
163
+ tcp_flags_16, protocol_src_tos, src_as, dst_as, src_dst_mask = objects.shift(16)
164
+ record = {
165
+ "version" => version,
166
+ "uptime" => uptime,
167
+ "flow_records" => flow_records,
168
+ "flow_seq_num" => flow_seq_num,
169
+ "engine_type" => engine_type,
170
+ "engine_id" => engine_id,
171
+ "sampling_algorithm" => sampling_algorithm,
172
+ "sampling_interval" => sampling_interval,
173
+
174
+ "ipv4_src_addr" => ipv4_addr_to_string(src_addr),
175
+ "ipv4_dst_addr" => ipv4_addr_to_string(dst_addr),
176
+ "ipv4_next_hop" => ipv4_addr_to_string(next_hop),
177
+ "input_snmp" => input_snmp,
178
+ "output_snmp" => output_snmp,
179
+ "in_pkts" => in_pkts,
180
+ "in_bytes" => in_bytes,
181
+ "first_switched" => first_switched,
182
+ "last_switched" => last_switched,
183
+ "l4_src_port" => l4_src_port,
184
+ "l4_dst_port" => l4_dst_port,
185
+ "tcp_flags" => tcp_flags_16 & 0x00ff,
186
+ "protocol" => (protocol_src_tos & 0xff00) >> 8,
187
+ "src_tos" => (protocol_src_tos & 0x00ff),
188
+ "src_as" => src_as,
189
+ "dst_as" => dst_as,
190
+ "src_mask" => (src_dst_mask & 0xff00) >> 8,
191
+ "dst_mask" => (src_dst_mask & 0x00ff)
192
+ }
193
+ unless @switched_times_from_uptime
194
+ record["first_switched"] = format_for_switched(msec_from_boot_to_time(record["first_switched"], uptime, unix_sec, unix_nsec))
195
+ record["last_switched"] = format_for_switched(msec_from_boot_to_time(record["last_switched"] , uptime, unix_sec, unix_nsec))
196
+ end
197
+
198
+ block.call(time, record)
199
+ end
200
+ end
201
+
202
+ def handle_v9(host, pdu, block)
203
+ pdu.records.each do |flowset|
204
+ case flowset.flowset_id
205
+ when 0
206
+ handle_v9_flowset_template(host, pdu, flowset)
207
+ when 1
208
+ handle_v9_flowset_options_template(host, pdu, flowset)
209
+ when 256..65535
210
+ handle_v9_flowset_data(host, pdu, flowset, block)
211
+ else
212
+ $log.warn 'Unsupported flowset', flowset_id: flowset.flowset_id
213
+ end
214
+ end
215
+ end
216
+
217
+ def handle_v9_flowset_template(host, pdu, flowset)
218
+ flowset.flowset_data.templates.each do |template|
219
+ catch (:field) do
220
+ template_fields = []
221
+ template.template_fields.each do |field|
222
+ entry = netflow_field_for(field.field_type, field.field_length)
223
+ throw :field unless entry
224
+
225
+ template_fields << entry
226
+ end
227
+ # We get this far, we have a list of fields
228
+ key = "#{host}|#{pdu.source_id}|#{template.template_id}"
229
+ @templates[key, @cache_ttl] = BinData::Struct.new(endian: :big, fields: template_fields)
230
+ # Purge any expired templates
231
+ @templates.cleanup!
232
+ end
233
+ end
234
+ end
235
+
236
+ NETFLOW_V9_FIELD_CATEGORIES = ['scope', 'option']
237
+
238
+ def handle_v9_flowset_options_template(host, pdu, flowset)
239
+ flowset.flowset_data.templates.each do |template|
240
+ catch (:field) do
241
+ template_fields = []
242
+
243
+ NETFLOW_V9_FIELD_CATEGORIES.each do |category|
244
+ template["#{category}_fields"].each do |field|
245
+ entry = netflow_field_for(field.field_type, field.field_length, category)
246
+ throw :field unless entry
247
+
248
+ template_fields << entry
249
+ end
250
+ end
251
+
252
+ # We get this far, we have a list of fields
253
+ key = "#{host}|#{pdu.source_id}|#{template.template_id}"
254
+ @templates[key, @cache_ttl] = BinData::Struct.new(endian: :big, fields: template_fields)
255
+ # Purge any expired templates
256
+ @templates.cleanup!
257
+ end
258
+ end
259
+ end
260
+
261
+ FIELDS_FOR_COPY_V9 = ['version', 'flow_seq_num']
262
+
263
+ def handle_v9_flowset_data(host, pdu, flowset, block)
264
+ template_key = "#{host}|#{pdu.source_id}|#{flowset.flowset_id}"
265
+ template = @templates[template_key]
266
+ if ! template
267
+ $log.warn 'No matching template for',
268
+ host: host, source_id: pdu.source_id, flowset_id: flowset.flowset_id
269
+ return
270
+ end
271
+
272
+ length = flowset.flowset_length - 4
273
+
274
+ # Template shouldn't be longer than the flowset and there should
275
+ # be at most 3 padding bytes
276
+ if template.num_bytes > length or ! (length % template.num_bytes).between?(0, 3)
277
+ $log.warn "Template length doesn't fit cleanly into flowset",
278
+ template_id: flowset.flowset_id, template_length: template.num_bytes, flowset_length: length
279
+ return
280
+ end
281
+
282
+ array = BinData::Array.new(type: template, initial_length: length / template.num_bytes)
283
+
284
+ template_fields = array.read(flowset.flowset_data)
285
+ template_fields.each do |r|
286
+ if is_sampler?(r)
287
+ sampler_key = "#{host}|#{pdu.source_id}|#{r.flow_sampler_id}"
288
+ register_sampler_v9 sampler_key, r
289
+ next
290
+ end
291
+
292
+ time = pdu.unix_sec # TODO: Fluent::EventTime (see: forV5)
293
+ event = {}
294
+
295
+ # Fewer fields in the v9 header
296
+ FIELDS_FOR_COPY_V9.each do |f|
297
+ event[f] = pdu[f]
298
+ end
299
+
300
+ event['flowset_id'] = flowset.flowset_id
301
+
302
+ r.each_pair do |k, v|
303
+ case k
304
+ when :first_switched
305
+ unless @switched_times_from_uptime
306
+ event[k.to_s] = format_for_switched(msec_from_boot_to_time(v.snapshot, pdu.uptime, time, 0))
307
+ end
308
+ when :last_switched
309
+ unless @switched_times_from_uptime
310
+ event[k.to_s] = format_for_switched(msec_from_boot_to_time(v.snapshot, pdu.uptime, time, 0))
311
+ end
312
+ when :flowStartSeconds
313
+ event[k.to_s] = format_for_flowSeconds(Time.at(v.snapshot, 0))
314
+ when :flowEndSeconds
315
+ event[k.to_s] = format_for_flowSeconds(Time.at(v.snapshot, 0))
316
+ when :flowStartMilliseconds
317
+ divisor = 1_000
318
+ microseconds = (v.snapshot % 1_000) * 1_000
319
+ event[k.to_s] = format_for_flowMilliSeconds(Time.at(v.snapshot / divisor, microseconds))
320
+ when :flowEndMilliseconds
321
+ divisor = 1_000
322
+ microseconds = (v.snapshot % 1_000) * 1_000
323
+ event[k.to_s] = format_for_flowMilliSeconds(Time.at(v.snapshot / divisor, microseconds))
324
+ when :flowStartMicroseconds
325
+ divisor = 1_000_000
326
+ microseconds = (v.snapshot % 1_000_000)
327
+ event[k.to_s] = format_for_flowMicroSeconds(Time.at(v.snapshot / divisor, microseconds))
328
+ when :flowEndMicroseconds
329
+ divisor = 1_000_000
330
+ microseconds = (v.snapshot % 1_000_000)
331
+ event[k.to_s] = format_for_flowMicroSeconds(Time.at(v.snapshot / divisor, microseconds))
332
+ when :flowStartNanoseconds
333
+ divisor = 1_000_000_000
334
+ microseconds = (v.snapshot % 1_000_000_000) / 1_000
335
+ nanoseconds = v.snapshot % 1_000_000_000
336
+ time_with_nano = Time.at(v.snapshot / divisor, microseconds)
337
+ time_with_nano.nsec = nanoseconds
338
+ event[k.to_s] = format_for_flowNanoSeconds(time_with_nano)
339
+ when :flowEndNanoseconds
340
+ divisor = 1_000_000_000
341
+ microseconds = (v.snapshot % 1_000_000_000) / 1_000
342
+ nanoseconds = v.snapshot % 1_000_000_000
343
+ time_with_nano = Time.at(v.snapshot / divisor, microseconds)
344
+ time_with_nano.nsec = nanoseconds
345
+ event[k.to_s] = format_for_flowNanoSeconds(time_with_nano)
346
+ else
347
+ event[k.to_s] = v.snapshot
348
+ end
349
+ end
350
+
351
+ if sampler_id = r['flow_sampler_id']
352
+ sampler_key = "#{host}|#{pdu.source_id}|#{sampler_id}"
353
+ if sampler = @samplers_v9[sampler_key]
354
+ event['sampling_algorithm'] ||= sampler['flow_sampler_mode']
355
+ event['sampling_interval'] ||= sampler['flow_sampler_random_interval']
356
+ end
357
+ end
358
+
359
+ block.call(time, event)
360
+ end
361
+ end
362
+
363
+ def uint_field(length, default)
364
+ # If length is 4, return :uint32, etc. and use default if length is 0
365
+ ("uint" + (((length > 0) ? length : default) * 8).to_s).to_sym
366
+ end
367
+
368
+ def netflow_field_for(type, length, category = 'option'.freeze)
369
+ unless field = @template_fields[category][type]
370
+ $log.warn "Skip unsupported field", type: type, length: length
371
+ return [:skip, nil, {length: length}]
372
+ end
373
+
374
+ unless field.is_a?(Array)
375
+ $log.warn "Skip non-Array definition", field: field
376
+ return [:skip, nil, {length: length}]
377
+ end
378
+
379
+ # Small bit of fixup for numeric value, :skip or :string field length, which are dynamic
380
+ case field[0]
381
+ when Integer
382
+ [uint_field(length, field[0]), field[1]]
383
+ when :skip
384
+ field + [nil, {length: length}]
385
+ when :string
386
+ field + [{length: length, trim_padding: true}]
387
+ else
388
+ field
389
+ end
390
+ end
391
+
392
+ # covers Netflow v9 and v10 (a.k.a IPFIX)
393
+ def is_sampler?(record)
394
+ record['flow_sampler_id'] && record['flow_sampler_mode'] && record['flow_sampler_random_interval']
395
+ end
396
+
397
+ def register_sampler_v9(key, sampler)
398
+ @samplers_v9[key, @cache_ttl] = sampler
399
+ @samplers_v9.cleanup!
400
+ end
401
+ end
402
+ end
403
+ end