fluent-plugin-netflow 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ module Fluent
2
+ class TextParser
3
+ class NetflowParser < Parser
4
+ # https://gist.github.com/joshaven/184837
5
+ class Vash < Hash
6
+ def initialize(constructor = {})
7
+ @register ||= {}
8
+ if constructor.is_a?(Hash)
9
+ super()
10
+ merge(constructor)
11
+ else
12
+ super(constructor)
13
+ end
14
+ end
15
+
16
+ alias_method :regular_writer, :[]= unless method_defined?(:regular_writer)
17
+ alias_method :regular_reader, :[] unless method_defined?(:regular_reader)
18
+
19
+ def [](key)
20
+ sterilize(key)
21
+ clear(key) if expired?(key)
22
+ regular_reader(key)
23
+ end
24
+
25
+ def []=(key, *args)
26
+ if args.length == 2
27
+ value, ttl = args[1], args[0]
28
+ elsif args.length == 1
29
+ value, ttl = args[0], 60
30
+ else
31
+ raise ArgumentError, "Wrong number of arguments, expected 2 or 3, received: #{args.length+1}\n"+
32
+ "Example Usage: volatile_hash[:key]=value OR volatile_hash[:key, ttl]=value"
33
+ end
34
+ sterilize(key)
35
+ ttl(key, ttl)
36
+ regular_writer(key, value)
37
+ end
38
+
39
+ def merge(hsh)
40
+ hsh.map {|key,value| self[sterile(key)] = hsh[key]}
41
+ self
42
+ end
43
+
44
+ def cleanup!
45
+ now = Time.now.to_i
46
+ @register.map {|k,v| clear(k) if v < now}
47
+ end
48
+
49
+ def clear(key)
50
+ sterilize(key)
51
+ @register.delete key
52
+ self.delete key
53
+ end
54
+
55
+ private
56
+
57
+ def expired?(key)
58
+ Time.now.to_i > @register[key].to_i
59
+ end
60
+
61
+ def ttl(key, secs=60)
62
+ @register[key] = Time.now.to_i + secs.to_i
63
+ end
64
+
65
+ def sterile(key)
66
+ String === key ? key.chomp('!').chomp('=') : key.to_s.chomp('!').chomp('=').to_sym
67
+ end
68
+
69
+ def sterilize(key)
70
+ key = sterile(key)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+
16
+ # $log = Fluent::Log.new(Fluent::Test::DummyLogDevice.new, Fluent::Log::LEVEL_INFO)
17
+
18
+ require 'fluent/plugin/parser_netflow'
19
+ require 'fluent/plugin/in_netflow'
20
+
21
+ def unused_port
22
+ s = TCPServer.open(0)
23
+ port = s.addr[1]
24
+ s.close
25
+ port
26
+ end
Binary file
@@ -0,0 +1,32 @@
1
+ require 'helper'
2
+
3
+ class NetflowInputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ PORT = unused_port
9
+ CONFIG = %[
10
+ port #{PORT}
11
+ bind 127.0.0.1
12
+ tag test.netflow
13
+ ]
14
+
15
+ def create_driver(conf=CONFIG)
16
+ Fluent::Test::InputTestDriver.new(Fluent::NetflowInput).configure(conf)
17
+ end
18
+
19
+ def test_configure
20
+ d = create_driver
21
+ assert_equal PORT, d.instance.port
22
+ assert_equal '127.0.0.1', d.instance.bind
23
+ assert_equal 'test.netflow', d.instance.tag
24
+ assert_equal :udp, d.instance.protocol_type
25
+
26
+ assert_raise Fluent::ConfigError do
27
+ d = create_driver CONFIG + %[
28
+ protocol_type tcp
29
+ ]
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,379 @@
1
+ require 'helper'
2
+
3
+ class NetflowParserTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ def create_parser(conf={})
9
+ parser = Fluent::TextParser::NetflowParser.new
10
+ parser.configure(Fluent::Config::Element.new('ROOT', '', conf, []))
11
+ parser
12
+ end
13
+
14
+ test 'configure' do
15
+ assert_nothing_raised do
16
+ parser = create_parser
17
+ end
18
+ end
19
+
20
+ test 'parse v5 binary data, dumped by netflow-generator' do
21
+ # generated by https://github.com/mshindo/NetFlow-Generator
22
+ parser = create_parser
23
+ raw_data = File.open(File.expand_path('../netflow.v5.dump', __FILE__)){|f| f.read }
24
+ bytes_for_1record = 72
25
+ assert_equal bytes_for_1record, raw_data.size
26
+ parsed = []
27
+ parser.call(raw_data) do |time, data|
28
+ parsed << [time, data]
29
+ end
30
+ assert_equal 1, parsed.size
31
+ assert_equal Time.parse('2016-02-29 11:14:00 -0800').to_i, parsed.first[0]
32
+ expected_record = {
33
+ # header
34
+ "version" => 5,
35
+ "uptime" => 1785097000,
36
+ "flow_records" => 1,
37
+ "flow_seq_num" => 1,
38
+ "engine_type" => 1,
39
+ "engine_id" => 1,
40
+ "sampling_algorithm" => 0,
41
+ "sampling_interval" => 0,
42
+
43
+ # record
44
+ "ipv4_src_addr" => "10.0.0.11",
45
+ "ipv4_dst_addr" => "20.0.0.187",
46
+ "ipv4_next_hop" => "30.0.0.254",
47
+ "input_snmp" => 1,
48
+ "output_snmp" => 2,
49
+ "in_pkts" => 173,
50
+ "in_bytes" => 4581,
51
+ "first_switched" => "2016-02-29T19:13:59.215Z",
52
+ "last_switched" => "2016-02-29T19:14:00.090Z",
53
+ "l4_src_port" => 1001,
54
+ "l4_dst_port" => 3001,
55
+ "tcp_flags" => 27,
56
+ "protocol" => 6,
57
+ "src_tos" => 0,
58
+ "src_as" => 101,
59
+ "dst_as" => 201,
60
+ "src_mask" => 24,
61
+ "dst_mask" => 24,
62
+ }
63
+ assert_equal expected_record, parsed.first[1]
64
+ end
65
+
66
+ DEFAULT_UPTIME = 1048383625 # == (((12 * 24 + 3) * 60 + 13) * 60 + 3) * 1000 + 625
67
+ # 12days 3hours 13minutes 3seconds 625 milliseconds
68
+
69
+ DEFAULT_TIME = Time.parse('2016-02-29 11:14:00 -0800').to_i
70
+ DEFAULT_NSEC = rand(1_000_000_000)
71
+
72
+ def msec_from_boot_to_time_by_rational(msec, uptime: DEFAULT_UPTIME, sec: DEFAULT_TIME, nsec: DEFAULT_NSEC)
73
+ current_time = Rational(sec) + Rational(nsec, 1_000_000_000)
74
+ diff_msec = uptime - msec
75
+ target_time = current_time - Rational(diff_msec, 1_000)
76
+ Time.at(target_time)
77
+ end
78
+
79
+ def msec_from_boot_to_time(msec, uptime: DEFAULT_UPTIME, sec: DEFAULT_TIME, nsec: DEFAULT_NSEC)
80
+ millis = uptime - msec
81
+ seconds = sec - (millis / 1000)
82
+ micros = (nsec / 1000) - ((millis % 1000) * 1000)
83
+ if micros < 0
84
+ seconds -= 1
85
+ micros += 1000000
86
+ end
87
+ Time.at(seconds, micros)
88
+ end
89
+
90
+ def format_for_switched(time)
91
+ time.utc.strftime("%Y-%m-%dT%H:%M:%S.%3NZ")
92
+ end
93
+
94
+ test 'converting msec from boottime to time works correctly' do
95
+ assert_equal msec_from_boot_to_time(300).to_i, msec_from_boot_to_time_by_rational(300).to_i
96
+ assert_equal msec_from_boot_to_time(300).usec, msec_from_boot_to_time_by_rational(300).usec
97
+ end
98
+
99
+ test 'check performance degradation about stringifying *_switched times' do
100
+ parser = create_parser({"switched_times_from_uptime" => true})
101
+ data = v5_data(
102
+ version: 5,
103
+ flow_records: 50,
104
+ uptime: DEFAULT_UPTIME,
105
+ unix_sec: DEFAULT_TIME,
106
+ unix_nsec: DEFAULT_NSEC,
107
+ flow_seq_num: 1,
108
+ engine_type: 1,
109
+ engine_id: 1,
110
+ sampling_algorithm: 0,
111
+ sampling_interval: 0,
112
+ records: [
113
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
114
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
115
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
116
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
117
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
118
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
119
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
120
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
121
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
122
+ v5_record(), v5_record(), v5_record(), v5_record(), v5_record(),
123
+ ]
124
+ )
125
+
126
+ bench_data = data.to_binary_s # 50 records
127
+
128
+ # configure to leave uptime-based value as-is
129
+ count = 0
130
+ GC.start
131
+ t1 = Time.now
132
+ 1000.times do
133
+ parser.call(bench_data) do |time, record|
134
+ # do nothing
135
+ count += 1
136
+ end
137
+ end
138
+ t2 = Time.now
139
+ uptime_based_switched = t2 - t1
140
+
141
+ assert{ count == 50000 }
142
+
143
+ # make time conversion to use Rational
144
+ count = 0
145
+ GC.start
146
+ t3 = Time.now
147
+ 1000.times do
148
+ parser.call(bench_data) do |time, record|
149
+ record["first_switched"] = format_for_switched(msec_from_boot_to_time_by_rational(record["first_switched"]))
150
+ record["last_switched"] = format_for_switched(msec_from_boot_to_time_by_rational(record["last_switched"]))
151
+ count += 1
152
+ end
153
+ end
154
+ t4 = Time.now
155
+ using_rational = t4 - t3
156
+
157
+ assert{ count == 50000 }
158
+
159
+ # skip time formatting
160
+ count = 0
161
+ GC.start
162
+ t5 = Time.now
163
+ 1000.times do
164
+ parser.call(bench_data) do |time, record|
165
+ record["first_switched"] = msec_from_boot_to_time(record["first_switched"])
166
+ record["last_switched"] = msec_from_boot_to_time(record["last_switched"])
167
+ count += 1
168
+ end
169
+ end
170
+ t6 = Time.now
171
+ skip_time_formatting = t6 - t5
172
+
173
+ assert{ count == 50000 }
174
+
175
+ # with full time conversion (default)
176
+ parser = create_parser
177
+ count = 0
178
+ GC.start
179
+ t7 = Time.now
180
+ 1000.times do
181
+ parser.call(bench_data) do |time, record|
182
+ count += 1
183
+ end
184
+ end
185
+ t8 = Time.now
186
+ default_formatting = t8 - t7
187
+
188
+ assert{ count == 50000 }
189
+
190
+ assert{ using_rational > default_formatting }
191
+ assert{ default_formatting > skip_time_formatting }
192
+ assert{ skip_time_formatting > uptime_based_switched }
193
+ end
194
+
195
+ test 'parse v5 binary data contains 1 record, generated from definition' do
196
+ parser = create_parser
197
+ parsed = []
198
+
199
+ time1 = DEFAULT_TIME
200
+ data1 = v5_data(
201
+ version: 5,
202
+ flow_records: 1,
203
+ uptime: DEFAULT_UPTIME,
204
+ unix_sec: DEFAULT_TIME,
205
+ unix_nsec: DEFAULT_NSEC,
206
+ flow_seq_num: 1,
207
+ engine_type: 1,
208
+ engine_id: 1,
209
+ sampling_algorithm: 0,
210
+ sampling_interval: 0,
211
+ records: [
212
+ v5_record,
213
+ ]
214
+ )
215
+
216
+ parser.call(data1.to_binary_s) do |time, record|
217
+ parsed << [time, record]
218
+ end
219
+
220
+ assert_equal 1, parsed.size
221
+ assert_equal time1, parsed.first[0]
222
+
223
+ event = parsed.first[1]
224
+
225
+ assert_equal 5, event["version"]
226
+ assert_equal 1, event["flow_records"]
227
+ assert_equal 1, event["flow_seq_num"]
228
+ assert_equal 1, event["engine_type"]
229
+ assert_equal 1, event["engine_id"]
230
+ assert_equal 0, event["sampling_algorithm"]
231
+ assert_equal 0, event["sampling_interval"]
232
+
233
+ assert_equal "10.0.1.122", event["ipv4_src_addr"]
234
+ assert_equal "192.168.0.3", event["ipv4_dst_addr"]
235
+ assert_equal "10.0.0.3", event["ipv4_next_hop"]
236
+ assert_equal 1, event["input_snmp"]
237
+ assert_equal 2, event["output_snmp"]
238
+ assert_equal 156, event["in_pkts"]
239
+ assert_equal 1024, event["in_bytes"]
240
+ assert_equal format_for_switched(msec_from_boot_to_time(DEFAULT_UPTIME - 13000)), event["first_switched"]
241
+ assert_equal format_for_switched(msec_from_boot_to_time(DEFAULT_UPTIME - 12950)), event["last_switched"]
242
+ assert_equal 1048, event["l4_src_port"]
243
+ assert_equal 80, event["l4_dst_port"]
244
+ assert_equal 27, event["tcp_flags"]
245
+ assert_equal 6, event["protocol"]
246
+ assert_equal 0, event["src_tos"]
247
+ assert_equal 101, event["src_as"]
248
+ assert_equal 201, event["dst_as"]
249
+ assert_equal 24, event["src_mask"]
250
+ assert_equal 24, event["dst_mask"]
251
+ end
252
+
253
+ test 'parse v5 binary data contains 1 record, generated from definition, leaving switched times as using uptime' do
254
+ parser = create_parser({"switched_times_from_uptime" => true})
255
+ parsed = []
256
+
257
+ time1 = DEFAULT_TIME
258
+ data1 = v5_data(
259
+ version: 5,
260
+ flow_records: 1,
261
+ uptime: DEFAULT_UPTIME,
262
+ unix_sec: DEFAULT_TIME,
263
+ unix_nsec: DEFAULT_NSEC,
264
+ flow_seq_num: 1,
265
+ engine_type: 1,
266
+ engine_id: 1,
267
+ sampling_algorithm: 0,
268
+ sampling_interval: 0,
269
+ records: [
270
+ v5_record,
271
+ ]
272
+ )
273
+
274
+ parser.call(data1.to_binary_s) do |time, record|
275
+ parsed << [time, record]
276
+ end
277
+
278
+ assert_equal 1, parsed.size
279
+ assert_equal time1, parsed.first[0]
280
+
281
+ event = parsed.first[1]
282
+
283
+ assert_equal 5, event["version"]
284
+ assert_equal 1, event["flow_records"]
285
+ assert_equal 1, event["flow_seq_num"]
286
+ assert_equal 1, event["engine_type"]
287
+ assert_equal 1, event["engine_id"]
288
+ assert_equal 0, event["sampling_algorithm"]
289
+ assert_equal 0, event["sampling_interval"]
290
+
291
+ assert_equal "10.0.1.122", event["ipv4_src_addr"]
292
+ assert_equal "192.168.0.3", event["ipv4_dst_addr"]
293
+ assert_equal "10.0.0.3", event["ipv4_next_hop"]
294
+ assert_equal 1, event["input_snmp"]
295
+ assert_equal 2, event["output_snmp"]
296
+ assert_equal 156, event["in_pkts"]
297
+ assert_equal 1024, event["in_bytes"]
298
+ assert_equal (DEFAULT_UPTIME - 13000), event["first_switched"]
299
+ assert_equal (DEFAULT_UPTIME - 12950), event["last_switched"]
300
+ assert_equal 1048, event["l4_src_port"]
301
+ assert_equal 80, event["l4_dst_port"]
302
+ assert_equal 27, event["tcp_flags"]
303
+ assert_equal 6, event["protocol"]
304
+ assert_equal 0, event["src_tos"]
305
+ assert_equal 101, event["src_as"]
306
+ assert_equal 201, event["dst_as"]
307
+ assert_equal 24, event["src_mask"]
308
+ assert_equal 24, event["dst_mask"]
309
+ end
310
+
311
+ require 'fluent/plugin/netflow_records'
312
+ def ipv4addr(v)
313
+ addr = Fluent::TextParser::NetflowParser::IP4Addr.new
314
+ addr.set(v)
315
+ addr
316
+ end
317
+
318
+ def ipv6addr(v)
319
+ addr = Fluent::TextParser::NetflowParser::IP6Addr.new
320
+ addr.set(v)
321
+ addr
322
+ end
323
+
324
+ def macaddr(v)
325
+ addr = Fluent::TextParser::NetflowParser::MacAddr.new
326
+ addr.set(v)
327
+ addr
328
+ end
329
+
330
+ def mplslabel(v)
331
+ label = Fluent::TextParser::NetflowParser::MplsLabel.new
332
+ label.set(v)
333
+ label
334
+ end
335
+
336
+ def v5_record(hash={})
337
+ {
338
+ ipv4_src_addr: "10.0.1.122",
339
+ ipv4_dst_addr: "192.168.0.3",
340
+ ipv4_next_hop: "10.0.0.3",
341
+ input_snmp: 1,
342
+ output_snmp: 2,
343
+ in_pkts: 156,
344
+ in_bytes: 1024,
345
+ first_switched: DEFAULT_UPTIME - 13000, # 13seconds ago
346
+ last_switched: DEFAULT_UPTIME - 12950, # 50msec later after first switched
347
+ l4_src_port: 1048,
348
+ l4_dst_port: 80,
349
+ tcp_flags: 27,
350
+ protocol: 6,
351
+ src_tos: 0,
352
+ src_as: 101,
353
+ dst_as: 201,
354
+ src_mask: 24,
355
+ dst_mask: 24,
356
+ }.merge(hash)
357
+ end
358
+
359
+ def v5_data(hash={})
360
+ hash = hash.dup
361
+ hash[:records] = (hash[:records] || []).map{|r|
362
+ r = r.dup
363
+ [:ipv4_src_addr, :ipv4_dst_addr, :ipv4_next_hop].each do |key|
364
+ r[key] = ipv4addr(r[key]) if r[key]
365
+ end
366
+ r
367
+ }
368
+ Fluent::TextParser::NetflowParser::Netflow5PDU.new(hash)
369
+ end
370
+
371
+ def v9_template(hash)
372
+ end
373
+
374
+ def v9_option(hash)
375
+ end
376
+
377
+ def v9_data(hash)
378
+ end
379
+ end