fluent-plugin-multiline-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,285 @@
1
+ require 'helper'
2
+ require_relative '../custom_parser'
3
+
4
+ class ParserOutputParsersTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ def create_driver(conf, tag)
10
+ Fluent::Test::OutputTestDriver.new(Fluent::ParserOutput, tag).configure(conf)
11
+ end
12
+
13
+ def test_regexp_parser
14
+ # exists in test_out_parser
15
+ end
16
+
17
+ def test_json_parser
18
+ # exists in test_out_parser
19
+ end
20
+
21
+ def test_tsv_parser
22
+ # exists in test_out_parser
23
+ end
24
+
25
+ def test_ltsv_parser
26
+ # exists in test_out_parser
27
+ end
28
+
29
+ def test_csv_parser
30
+ # exists in test_out_parser
31
+ end
32
+
33
+ def test_none_parser
34
+ d = create_driver(<<EOF, 'test.in')
35
+ remove_prefix test
36
+ add_prefix parsed
37
+ key_name message
38
+ format none
39
+ EOF
40
+ time = Time.parse("2014-11-05 15:59:30").to_i
41
+ d.run do
42
+ d.emit({"message" => "aaaa bbbb cccc 1"}, time)
43
+ d.emit({"message" => "aaaa bbbb cccc 2"}, time)
44
+ d.emit({"message" => "aaaa bbbb cccc 3"}, time)
45
+ d.emit({"message" => "aaaa bbbb cccc 4"}, time)
46
+ end
47
+
48
+ e = d.emits
49
+ assert_equal 4, e.length
50
+
51
+ assert_equal 'parsed.in', e[0][0]
52
+ assert_equal time, e[0][1]
53
+ assert_equal 'aaaa bbbb cccc 1', e[0][2]['message']
54
+
55
+ assert_equal 'parsed.in', e[1][0]
56
+ assert_equal time, e[1][1]
57
+ assert_equal 'aaaa bbbb cccc 2', e[1][2]['message']
58
+
59
+ assert_equal 'parsed.in', e[2][0]
60
+ assert_equal time, e[2][1]
61
+ assert_equal 'aaaa bbbb cccc 3', e[2][2]['message']
62
+
63
+ assert_equal 'parsed.in', e[3][0]
64
+ assert_equal time, e[3][1]
65
+ assert_equal 'aaaa bbbb cccc 4', e[3][2]['message']
66
+ end
67
+
68
+ def test_apache_parser
69
+ log1 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
70
+ log2 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
71
+ log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
72
+
73
+ d = create_driver(<<EOF, 'test.in')
74
+ remove_prefix test
75
+ add_prefix parsed
76
+ key_name message
77
+ format apache
78
+ EOF
79
+ time = Time.parse("2014-11-05 15:59:30").to_i
80
+ d.run do
81
+ d.emit({"message" => log1}, time)
82
+ d.emit({"message" => log2}, time)
83
+ end
84
+
85
+ e = d.emits
86
+ assert_equal 2, e.length
87
+
88
+ assert_equal 'parsed.in', e[0][0]
89
+ assert_equal log_time, e[0][1]
90
+ assert_equal '127.0.0.1', e[0][2]['host']
91
+ assert_equal 'frank', e[0][2]['user']
92
+ assert_equal 'GET', e[0][2]['method']
93
+ assert_equal '/apache_pb.gif', e[0][2]['path']
94
+ assert_equal '200', e[0][2]['code']
95
+ assert_equal '2326', e[0][2]['size']
96
+ assert_nil e[0][2]['referer']
97
+ assert_nil e[0][2]['agent']
98
+
99
+ assert_equal 'parsed.in', e[1][0]
100
+ assert_equal log_time, e[1][1]
101
+ assert_equal '127.0.0.1', e[1][2]['host']
102
+ assert_equal 'frank', e[1][2]['user']
103
+ assert_equal 'GET', e[1][2]['method']
104
+ assert_equal '/apache_pb.gif', e[1][2]['path']
105
+ assert_equal '200', e[1][2]['code']
106
+ assert_equal '2326', e[1][2]['size']
107
+ assert_equal 'http://www.example.com/start.html', e[1][2]['referer']
108
+ assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[1][2]['agent']
109
+ end
110
+
111
+ def test_apache_parser_with_types
112
+ log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
113
+ log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
114
+
115
+ d = create_driver(<<EOF, 'test.in')
116
+ remove_prefix test
117
+ add_prefix parsed
118
+ key_name message
119
+ format apache
120
+ types code:integer,size:integer
121
+ EOF
122
+ time = Time.parse("2014-11-05 15:59:30").to_i
123
+ d.run do
124
+ d.emit({"message" => log}, time)
125
+ end
126
+
127
+ e = d.emits
128
+ assert_equal 1, e.length
129
+
130
+ assert_equal 'parsed.in', e[0][0]
131
+ assert_equal log_time, e[0][1]
132
+ assert_equal '127.0.0.1', e[0][2]['host']
133
+ assert_equal 'frank', e[0][2]['user']
134
+ assert_equal 'GET', e[0][2]['method']
135
+ assert_equal '/apache_pb.gif', e[0][2]['path']
136
+ assert_equal 200, e[0][2]['code']
137
+ assert_equal 2326, e[0][2]['size']
138
+ assert_equal 'http://www.example.com/start.html', e[0][2]['referer']
139
+ assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[0][2]['agent']
140
+ end
141
+
142
+ def test_syslog_parser
143
+ loglines = <<LOGS
144
+ Nov 5 16:19:48 myhost.local netbiosd[50]: name servers down?
145
+ Nov 5 16:21:20 myhost.local coreaudiod[320]: Disabled automatic stack shots because audio IO is active
146
+ Nov 5 16:21:20 myhost.local coreaudiod[320]: Enabled automatic stack shots because audio IO is inactive
147
+ LOGS
148
+ logs = loglines.split("\n").reject(&:empty?)
149
+
150
+ d = create_driver(<<EOF, 'test.in')
151
+ remove_prefix test
152
+ add_prefix parsed
153
+ key_name message
154
+ format syslog
155
+ EOF
156
+ time = Time.parse("11/05 15:59:30").to_i # time is assumed as current year
157
+ d.run do
158
+ d.emit({"message" => logs[0]}, time)
159
+ d.emit({"message" => logs[1]}, time)
160
+ d.emit({"message" => logs[2]}, time)
161
+ end
162
+
163
+ emits = d.emits
164
+ assert_equal 3, emits.length
165
+
166
+ e = emits[0]
167
+ assert_equal 'parsed.in', e[0]
168
+ assert_equal Time.parse("11/05 16:19:48").to_i, e[1]
169
+ r = e[2]
170
+ assert_equal 'myhost.local', r['host']
171
+ assert_equal 'netbiosd', r['ident']
172
+ assert_equal '50', r['pid']
173
+ assert_equal 'name servers down?', r['message']
174
+
175
+ e = emits[1]
176
+ assert_equal 'parsed.in', e[0]
177
+ assert_equal Time.parse("11/05 16:21:20").to_i, e[1]
178
+ r = e[2]
179
+ assert_equal 'myhost.local', r['host']
180
+ assert_equal 'coreaudiod', r['ident']
181
+ assert_equal '320', r['pid']
182
+ assert_equal 'Disabled automatic stack shots because audio IO is active', r['message']
183
+
184
+ e = emits[2]
185
+ assert_equal 'parsed.in', e[0]
186
+ assert_equal Time.parse("11/05 16:21:20").to_i, e[1]
187
+ r = e[2]
188
+ assert_equal 'myhost.local', r['host']
189
+ assert_equal 'coreaudiod', r['ident']
190
+ assert_equal '320', r['pid']
191
+ assert_equal 'Enabled automatic stack shots because audio IO is inactive', r['message']
192
+ end
193
+
194
+ def x_test_multiline_parser
195
+ # I can't configure this format well...
196
+ log1 = <<LOG
197
+ *** 2014/11/05 16:33:01 -0700
198
+ host: myhost
199
+ port: 2048
200
+ message: first line
201
+ LOG
202
+ log2 = <<LOG
203
+ *** 2014/11/05 16:33:02 +0900
204
+ host: myhost
205
+ port: 2049
206
+ message: second line
207
+ LOG
208
+ log3 = <<LOG
209
+ *** 2014/11/05 16:43:11 +1100
210
+ LOG
211
+ d = create_driver(<<'EOF', 'test.in')
212
+ remove_prefix test
213
+ add_prefix parsed
214
+ key_name message
215
+ format multiline
216
+ time_format %Y/%m/%d %H:%M:%S %z
217
+ format_firstline /^\*\*\* /
218
+ format1 /\*\*\* (?<time>\d{4}/\d\d/\d\d/ \d\d:\d\d:\d\d [-+]\d{4})/
219
+ format2 /\s*host: (?<host>[^\s]+)/
220
+ format3 /\s*port: (?<port>\d+)/
221
+ format4 /\s*message: (?<message>[^ ]*)/
222
+ EOF
223
+ time = Time.parse("2014-11-05 15:59:30").to_i
224
+ d.run do
225
+ d.emit({"message" => log1}, time)
226
+ d.emit({"message" => log2}, time)
227
+ d.emit({"message" => log3}, time)
228
+ end
229
+
230
+ emits = d.emits
231
+ assert_equal 2, emits.length
232
+
233
+ e = emits[0]
234
+ assert_equal 'parsed.in', e[0]
235
+ assert_equal Time.parse("2014-11-05 16:33:01 -0700").to_i, e[1]
236
+ r = e[2]
237
+ assert_equal 'myhost', r['host']
238
+ assert_equal '2048', r['port']
239
+ assert_equal 'first line', r['message']
240
+
241
+ e = emits[1]
242
+ assert_equal 'parsed.in', e[0]
243
+ assert_equal Time.parse("2014-11-05 16:33:02 +0900").to_i, e[1]
244
+ r = e[2]
245
+ assert_equal 'myhost', r['host']
246
+ assert_equal '2049', r['port']
247
+ assert_equal 'second line', r['message']
248
+ end
249
+
250
+ def test_custom_parser
251
+ d = create_driver(<<'EOF', 'test.in')
252
+ remove_prefix test
253
+ add_prefix parsed
254
+ key_name message
255
+ format kv_pair
256
+ time_format %Y-%m-%d %H:%M:%S %z
257
+ delim1 :
258
+ delim2 ,
259
+ EOF
260
+ time = Time.parse("2014-11-05 15:59:30").to_i
261
+ d.run do
262
+ d.emit({"message" => "k1:v1,k2:v2,k3:1,time:2014-11-05 00:00:00 +0000"}, time)
263
+ d.emit({"message" => "k1:v1,k2:v2,k3:2"}, time) # original time is used
264
+ d.emit({"message" => "k1:v1,k2:v2,k3:3,time:2014-11-05 00:00:00"}, time) # time parse error -> not emitted
265
+ end
266
+ emits = d.emits
267
+ assert_equal 2, emits.length
268
+
269
+ e = emits[0]
270
+ assert_equal 'parsed.in', e[0]
271
+ assert_equal Time.parse("2014-11-05 00:00:00 +0000").to_i, e[1]
272
+ r = e[2]
273
+ assert_equal 'v1', r['k1']
274
+ assert_equal 'v2', r['k2']
275
+ assert_equal '1', r['k3']
276
+
277
+ e = emits[1]
278
+ assert_equal 'parsed.in', e[0]
279
+ assert_equal Time.parse("2014-11-05 15:59:30").to_i, e[1]
280
+ r = e[2]
281
+ assert_equal 'v1', r['k1']
282
+ assert_equal 'v2', r['k2']
283
+ assert_equal '2', r['k3']
284
+ end
285
+ end
metadata ADDED
@@ -0,0 +1,125 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-multiline-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jerry Zhou
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: test-unit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: thread_safe
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: fluentd
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.12.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 0.12.0
69
+ description: fluentd plugin to parse single field, or to combine log structure into
70
+ single field, and support multiline format
71
+ email:
72
+ - quicksort@outlook.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - .gitignore
78
+ - Gemfile
79
+ - LICENSE
80
+ - README.md
81
+ - Rakefile
82
+ - fluent-plugin-multiline-parser.gemspec
83
+ - lib/fluent/plugin/filter_deparser.rb
84
+ - lib/fluent/plugin/filter_parser.rb
85
+ - lib/fluent/plugin/out_deparser.rb
86
+ - lib/fluent/plugin/out_parser.rb
87
+ - test/custom_parser.rb
88
+ - test/helper.rb
89
+ - test/plugin/test_deparser.rb
90
+ - test/plugin/test_filter_deparser.rb
91
+ - test/plugin/test_filter_parser.rb
92
+ - test/plugin/test_out_parser.rb
93
+ - test/plugin/test_out_parser_for_parsers.rb
94
+ homepage: https://github.com/quick-sort/fluent-plugin-multiline-parser
95
+ licenses:
96
+ - Apache-2.0
97
+ metadata: {}
98
+ post_install_message:
99
+ rdoc_options: []
100
+ require_paths:
101
+ - lib
102
+ required_ruby_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ required_rubygems_version: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ requirements: []
113
+ rubyforge_project:
114
+ rubygems_version: 2.0.14.1
115
+ signing_key:
116
+ specification_version: 4
117
+ summary: plugin to parse/combine multiline fluentd log messages
118
+ test_files:
119
+ - test/custom_parser.rb
120
+ - test/helper.rb
121
+ - test/plugin/test_deparser.rb
122
+ - test/plugin/test_filter_deparser.rb
123
+ - test/plugin/test_filter_parser.rb
124
+ - test/plugin/test_out_parser.rb
125
+ - test/plugin/test_out_parser_for_parsers.rb