fluent-plugin-parser 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd40a834ddb84cf70b817ca1eeb890549786afdf
4
- data.tar.gz: ec978eccafdcbde7569284036ef907b53cd0f2b9
3
+ metadata.gz: e389a96d9ae681aa25c0de95d9d1abb9b1c4189b
4
+ data.tar.gz: 3808413fd2cdd85483eeca9d01cdaed62e4fe1de
5
5
  SHA512:
6
- metadata.gz: 5b8098ed0973e33d060dcdc6e07251d462cb70d79795b9abf1b294c89e7db4c9ac48ac5f2df11fa4df7c5fc521c6f6443aeae355cd5255bdd405be435430ceb2
7
- data.tar.gz: a055940d6a07d97ba4e5438af542fb9cdf400f46115ce568eccdcb23497c3fcbf19bade3fae1a2e93e55f9c5f60d6073f2ea872a9c8947c3ca2642cce663d301
6
+ metadata.gz: 7174e622a987623dced94a830681e5d6f31742e2e2205964c709f9cccebca4a8a0b89b1ad082ded4cc9a8aedc8b71fe74e38af360bf33522476845639817d7c5
7
+ data.tar.gz: d6ea265808450c244ae34cc05efc62ef2e64a48b3b0c92327db373c49383d4d852b3ceb485dcf426d8200efc993fbf7ce3691ae8691f810296e85b19767781ef
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.5.0"
4
+ gem.version = "0.6.0"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -0,0 +1,35 @@
1
+ class Fluent::DeparserFilter < Fluent::Output
2
+ Fluent::Plugin.register_filter('deparser', self)
3
+
4
+ config_param :format, :string
5
+ config_param :format_key_names, :string
6
+ config_param :key_name, :string, default: 'message'
7
+ config_param :reserve_data, :bool, default: false
8
+
9
+ def configure(conf)
10
+ super
11
+
12
+ @format_key_names = @format_key_names.split(',')
13
+ begin
14
+ dummy = @format % (["x"] * @format_key_names.length)
15
+ rescue ArgumentError
16
+ raise Fluent::ConfigError, "mismatch between placeholder of format and format_key_names"
17
+ end
18
+ end
19
+
20
+ def filter_stream(tag, es)
21
+ new_es = Fluent::MultiEventStream.new
22
+ if @reserve_data
23
+ es.each {|time,record|
24
+ new_record = {@key_name => (@format % @format_key_names.map{|k| record[k]})}
25
+ new_es.add(time, record.merge(new_record))
26
+ }
27
+ else
28
+ es.each {|time,record|
29
+ new_record = {@key_name => (@format % @format_key_names.map{|k| record[k]})}
30
+ new_es.add(time, new_record)
31
+ }
32
+ end
33
+ new_es
34
+ end
35
+ end
@@ -0,0 +1,105 @@
1
+ require 'fluent/parser'
2
+
3
+ class Fluent::ParserFilter < Fluent::Filter
4
+ Fluent::Plugin.register_filter('parser', self)
5
+
6
+ config_param :key_name, :string
7
+ config_param :reserve_data, :bool, default: false
8
+ config_param :inject_key_prefix, :string, default: nil
9
+ config_param :replace_invalid_sequence, :bool, default: false
10
+ config_param :hash_value_field, :string, default: nil
11
+ config_param :suppress_parse_error_log, :bool, default: false
12
+ config_param :time_parse, :bool, default: true
13
+
14
+ attr_reader :parser
15
+
16
+ def initialize
17
+ super
18
+ require 'time'
19
+ end
20
+
21
+ def configure(conf)
22
+ super
23
+
24
+ @parser = Fluent::TextParser.new
25
+ @parser.estimate_current_event = false
26
+ @parser.configure(conf)
27
+ if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
28
+ # disable parse time
29
+ @parser.parser.time_key = nil
30
+ end
31
+
32
+ self
33
+ end
34
+
35
+ def filter_stream(tag, es)
36
+ new_es = Fluent::MultiEventStream.new
37
+ es.each do |time,record|
38
+ raw_value = record[@key_name]
39
+ begin
40
+ @parser.parse(raw_value) do |t,values|
41
+ if values
42
+ t ||= time
43
+ r = handle_parsed(tag, record, t, values)
44
+ new_es.add(t, r)
45
+ else
46
+ log.warn "pattern not match with data '#{raw_value}'" unless @suppress_parse_error_log
47
+ if @reserve_data
48
+ t = time
49
+ r = handle_parsed(tag, record, time, {})
50
+ new_es.add(t, r)
51
+ end
52
+ end
53
+ end
54
+ rescue Fluent::TextParser::ParserError => e
55
+ log.warn e.message unless @suppress_parse_error_log
56
+ rescue ArgumentError => e
57
+ if @replace_invalid_sequence
58
+ unless e.message.index("invalid byte sequence in") == 0
59
+ raise
60
+ end
61
+ replaced_string = replace_invalid_byte(raw_value)
62
+ @parser.parse(replaced_string) do |t,values|
63
+ if values
64
+ t ||= time
65
+ r = handle_parsed(tag, record, t, values)
66
+ new_es.add(t, r)
67
+ else
68
+ log.warn "pattern not match with data '#{raw_value}'" unless @suppress_parse_error_log
69
+ if @reserve_data
70
+ t = time
71
+ r = handle_parsed(tag, record, time, {})
72
+ new_es.add(t, r)
73
+ end
74
+ end
75
+ end
76
+ else
77
+ raise
78
+ end
79
+ rescue => e
80
+ log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
81
+ end
82
+ end
83
+ new_es
84
+ end
85
+
86
+ private
87
+
88
+ def handle_parsed(tag, record, t, values)
89
+ if values && @inject_key_prefix
90
+ values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
91
+ end
92
+ r = @hash_value_field ? {@hash_value_field => values} : values
93
+ if @reserve_data
94
+ r = r ? record.merge(r) : record
95
+ end
96
+ r
97
+ end
98
+
99
+ def replace_invalid_byte(string)
100
+ replace_options = { invalid: :replace, undef: :replace, replace: '?' }
101
+ original_encoding = string.encoding
102
+ temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
103
+ string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
104
+ end
105
+ end
data/test/helper.rb CHANGED
@@ -24,6 +24,8 @@ end
24
24
 
25
25
  require 'fluent/plugin/out_parser'
26
26
  require 'fluent/plugin/out_deparser'
27
+ require 'fluent/plugin/filter_parser'
28
+ require 'fluent/plugin/filter_deparser'
27
29
 
28
30
  class Test::Unit::TestCase
29
31
  end
@@ -0,0 +1,82 @@
1
+ require 'helper'
2
+
3
+ class DeparserFilterTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ format %s: %s %s %s
10
+ format_key_names host,path,status,size
11
+ key_name fulltext
12
+ reserve_data true
13
+ ]
14
+
15
+ def create_driver(conf=CONFIG,tag='test')
16
+ Fluent::Test::FilterTestDriver.new(Fluent::DeparserFilter, tag).configure(conf)
17
+ end
18
+
19
+ def test_configure
20
+ assert_raise(Fluent::ConfigError) {
21
+ d = create_driver('')
22
+ }
23
+
24
+ d = create_driver %[
25
+ format %s: %s %s
26
+ format_key_names x,y,z
27
+ ]
28
+ assert_equal '%s: %s %s', d.instance.format
29
+ assert_equal ['x','y','z'], d.instance.format_key_names
30
+ assert_equal 'message', d.instance.key_name
31
+ assert_equal false, d.instance.reserve_data
32
+ end
33
+
34
+ # CONFIG = %[
35
+ # format %s: %s %s %s
36
+ # format_key_names host path status size
37
+ # key_name fulltext
38
+ # reserve_data true
39
+ # ]
40
+ def test_filter
41
+ d1 = create_driver(CONFIG, 'test.no.change')
42
+ time = Time.parse("2012-01-02 13:14:15").to_i
43
+ d1.run do
44
+ d1.filter({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
45
+ d1.filter({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
46
+ end
47
+ filtered = d1.filtered_as_array
48
+ assert_equal 2, filtered.length
49
+ first = filtered[0]
50
+ assert_equal 'test.no.change', first[0]
51
+ assert_equal time, first[1]
52
+ assert_equal 'xxx.local: /f/1 200 300', first[2]['fulltext']
53
+ assert_equal ['fulltext','host','path','size','status'], first[2].keys.sort
54
+ second = filtered[1]
55
+ assert_equal 'test.no.change', second[0]
56
+ assert_equal time, second[1]
57
+ assert_equal 'yyy.local: /f/2 302 512', second[2]['fulltext']
58
+ assert_equal ['fulltext','host','path','size','status'], second[2].keys.sort
59
+
60
+ d2 = create_driver(%[
61
+ format %s: %s %s
62
+ format_key_names host,path,status
63
+ ], 'test.no.change')
64
+ time = Time.parse("2012-01-02 13:14:15").to_i
65
+ d2.run do
66
+ d2.filter({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
67
+ d2.filter({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
68
+ end
69
+ filtered = d2.filtered_as_array
70
+ assert_equal 2, filtered.length
71
+ first = filtered[0]
72
+ assert_equal 'test.no.change', first[0]
73
+ assert_equal time, first[1]
74
+ assert_equal 'xxx.local: /f/1 200', first[2]['message']
75
+ assert_equal ['message'], first[2].keys.sort
76
+ second = filtered[1]
77
+ assert_equal 'test.no.change', second[0]
78
+ assert_equal time, second[1]
79
+ assert_equal 'yyy.local: /f/2 302', second[2]['message']
80
+ assert_equal ['message'], second[2].keys.sort
81
+ end
82
+ end
@@ -0,0 +1,668 @@
1
+ require 'helper'
2
+
3
+ class ParserFilterTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ key_name message
10
+ format /^(?<x>.)(?<y>.) (?<time>.+)$/
11
+ time_format %Y%m%d%H%M%S
12
+ reserve_data true
13
+ ]
14
+
15
+ def create_driver(conf=CONFIG,tag='test')
16
+ Fluent::Test::FilterTestDriver.new(Fluent::ParserFilter, tag).configure(conf)
17
+ end
18
+
19
+ def test_configure
20
+ assert_raise(Fluent::ConfigError) {
21
+ d = create_driver('')
22
+ }
23
+ assert_raise(Fluent::ConfigError) {
24
+ d = create_driver %[
25
+ format unknown_format_that_will_never_be_implemented
26
+ key_name foo
27
+ ]
28
+ }
29
+ assert_nothing_raised {
30
+ d = create_driver %[
31
+ format /(?<x>.)/
32
+ key_name foo
33
+ ]
34
+ }
35
+ assert_nothing_raised {
36
+ d = create_driver %[
37
+ format /(?<x>.)/
38
+ key_name foo
39
+ ]
40
+ }
41
+ assert_nothing_raised {
42
+ d = create_driver %[
43
+ format /(?<x>.)/
44
+ key_name foo
45
+ ]
46
+ }
47
+ assert_nothing_raised {
48
+ d = create_driver %[
49
+ format /(?<x>.)/
50
+ key_name foo
51
+ ]
52
+ }
53
+ assert_nothing_raised {
54
+ d = create_driver %[
55
+ format json
56
+ key_name foo
57
+ ]
58
+ }
59
+ assert_nothing_raised {
60
+ d = create_driver %[
61
+ format ltsv
62
+ key_name foo
63
+ ]
64
+ }
65
+ assert_nothing_raised {
66
+ d = create_driver %[
67
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
68
+ key_name message
69
+ suppress_parse_error_log true
70
+ ]
71
+ }
72
+ assert_nothing_raised {
73
+ d = create_driver %[
74
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
75
+ key_name message
76
+ suppress_parse_error_log false
77
+ ]
78
+ }
79
+ d = create_driver %[
80
+ key_name foo
81
+ format /(?<x>.)/
82
+ ]
83
+ assert_equal false, d.instance.reserve_data
84
+ end
85
+
86
+ # CONFIG = %[
87
+ # remove_prefix test
88
+ # add_prefix parsed
89
+ # key_name message
90
+ # format /^(?<x>.)(?<y>.) (?<time>.+)$/
91
+ # time_format %Y%m%d%H%M%S
92
+ # reserve_data true
93
+ # ]
94
+ def test_filter
95
+ d1 = create_driver(CONFIG, 'test.no.change')
96
+ time = Time.parse("2012-01-02 13:14:15").to_i
97
+ d1.run do
98
+ d1.filter({'message' => '12 20120402182059'}, time)
99
+ d1.filter({'message' => '34 20120402182100'}, time)
100
+ d1.filter({'message' => '56 20120402182100'}, time)
101
+ d1.filter({'message' => '78 20120402182101'}, time)
102
+ d1.filter({'message' => '90 20120402182100'}, time)
103
+ end
104
+ filtered = d1.filtered_as_array
105
+ assert_equal 5, filtered.length
106
+
107
+ first = filtered[0]
108
+ assert_equal 'test.no.change', first[0]
109
+ assert_equal Time.parse("2012-04-02 18:20:59").to_i, first[1]
110
+ assert_equal '1', first[2]['x']
111
+ assert_equal '2', first[2]['y']
112
+ assert_equal '12 20120402182059', first[2]['message']
113
+
114
+ second = filtered[1]
115
+ assert_equal 'test.no.change', second[0]
116
+ assert_equal Time.parse("2012-04-02 18:21:00").to_i, second[1]
117
+ assert_equal '3', second[2]['x']
118
+ assert_equal '4', second[2]['y']
119
+
120
+ third = filtered[2]
121
+ assert_equal 'test.no.change', third[0]
122
+ assert_equal Time.parse("2012-04-02 18:21:00").to_i, third[1]
123
+ assert_equal '5', third[2]['x']
124
+ assert_equal '6', third[2]['y']
125
+
126
+ fourth = filtered[3]
127
+ assert_equal 'test.no.change', fourth[0]
128
+ assert_equal Time.parse("2012-04-02 18:21:01").to_i, fourth[1]
129
+ assert_equal '7', fourth[2]['x']
130
+ assert_equal '8', fourth[2]['y']
131
+
132
+ fifth = filtered[4]
133
+ assert_equal 'test.no.change', fifth[0]
134
+ assert_equal Time.parse("2012-04-02 18:21:00").to_i, fifth[1]
135
+ assert_equal '9', fifth[2]['x']
136
+ assert_equal '0', fifth[2]['y']
137
+
138
+ d2 = create_driver(%[
139
+ tag parsed
140
+ key_name data
141
+ format /^(?<x>.)(?<y>.) (?<t>.+)$/
142
+ ], 'test.no.change')
143
+ time = Time.parse("2012-04-02 18:20:59").to_i
144
+ d2.run do
145
+ d2.filter({'data' => '12 20120402182059'}, time)
146
+ d2.filter({'data' => '34 20120402182100'}, time)
147
+ end
148
+ filtered = d2.filtered_as_array
149
+ assert_equal 2, filtered.length
150
+
151
+ first = filtered[0]
152
+ assert_equal 'test.no.change', first[0]
153
+ assert_equal time, first[1]
154
+ assert_nil first[2]['data']
155
+ assert_equal '1', first[2]['x']
156
+ assert_equal '2', first[2]['y']
157
+ assert_equal '20120402182059', first[2]['t']
158
+
159
+ second = filtered[1]
160
+ assert_equal 'test.no.change', second[0]
161
+ assert_equal time, second[1]
162
+ assert_nil second[2]['data']
163
+ assert_equal '3', second[2]['x']
164
+ assert_equal '4', second[2]['y']
165
+ assert_equal '20120402182100', second[2]['t']
166
+
167
+ d3 = create_driver(%[
168
+ tag parsed
169
+ key_name data
170
+ format /^(?<x>[0-9])(?<y>[0-9]) (?<t>.+)$/
171
+ ], 'test.no.change')
172
+ time = Time.parse("2012-04-02 18:20:59").to_i
173
+ d3.run do
174
+ d3.filter({'data' => '12 20120402182059'}, time)
175
+ d3.filter({'data' => '34 20120402182100'}, time)
176
+ d3.filter({'data' => 'xy 20120402182101'}, time)
177
+ end
178
+ filtered = d3.filtered_as_array
179
+ assert_equal 2, filtered.length
180
+
181
+ d3x = create_driver(%[
182
+ tag parsed
183
+ key_name data
184
+ format /^(?<x>\\d)(?<y>\\d) (?<t>.+)$/
185
+ reserve_data yes
186
+ ], 'test.no.change')
187
+ time = Time.parse("2012-04-02 18:20:59").to_i
188
+ d3x.run do
189
+ d3x.filter({'data' => '12 20120402182059'}, time)
190
+ d3x.filter({'data' => '34 20120402182100'}, time)
191
+ d3x.filter({'data' => 'xy 20120402182101'}, time)
192
+ end
193
+ filtered = d3x.filtered_as_array
194
+ assert_equal 3, filtered.length
195
+
196
+ d4 = create_driver(%[
197
+ tag parsed
198
+ key_name data
199
+ format json
200
+ ], 'test.no.change')
201
+ time = Time.parse("2012-04-02 18:20:59").to_i
202
+ d4.run do
203
+ d4.filter({'data' => '{"xxx":"first","yyy":"second"}', 'xxx' => 'x', 'yyy' => 'y'}, time)
204
+ d4.filter({'data' => 'foobar', 'xxx' => 'x', 'yyy' => 'y'}, time)
205
+ end
206
+ filtered = d4.filtered_as_array
207
+ assert_equal 1, filtered.length
208
+
209
+ d4x = create_driver(%[
210
+ tag parsed
211
+ key_name data
212
+ format json
213
+ reserve_data yes
214
+ ], 'test.no.change')
215
+ time = Time.parse("2012-04-02 18:20:59").to_i
216
+ d4x.run do
217
+ d4x.filter({'data' => '{"xxx":"first","yyy":"second"}', 'xxx' => 'x', 'yyy' => 'y'}, time)
218
+ d4x.filter({'data' => 'foobar', 'xxx' => 'x', 'yyy' => 'y'}, time)
219
+ end
220
+ filtered = d4x.filtered_as_array
221
+ assert_equal 2, filtered.length
222
+
223
+ first = filtered[0]
224
+ assert_equal 'test.no.change', first[0]
225
+ assert_equal time, first[1]
226
+ assert_equal '{"xxx":"first","yyy":"second"}', first[2]['data']
227
+ assert_equal 'first', first[2]['xxx']
228
+ assert_equal 'second', first[2]['yyy']
229
+
230
+ second = filtered[1]
231
+ assert_equal 'test.no.change', second[0]
232
+ assert_equal time, second[1]
233
+ assert_equal 'foobar', second[2]['data']
234
+ assert_equal 'x', second[2]['xxx']
235
+ assert_equal 'y', second[2]['yyy']
236
+ end
237
+
238
+ CONFIG_LTSV = %[
239
+ format ltsv
240
+ key_name data
241
+ ]
242
+ def test_filter_ltsv
243
+ d = create_driver(CONFIG_LTSV, 'test.no.change')
244
+ time = Time.parse("2012-04-02 18:20:59").to_i
245
+ d.run do
246
+ d.filter({'data' => "xxx:first\tyyy:second", 'xxx' => 'x', 'yyy' => 'y'}, time)
247
+ d.filter({'data' => "xxx:first\tyyy:second2", 'xxx' => 'x', 'yyy' => 'y'}, time)
248
+ end
249
+ filtered = d.filtered_as_array
250
+ assert_equal 2, filtered.length
251
+
252
+ first = filtered[0]
253
+ assert_equal 'test.no.change', first[0]
254
+ assert_equal time, first[1]
255
+ assert_nil first[2]['data']
256
+ assert_equal 'first', first[2]['xxx']
257
+ assert_equal 'second', first[2]['yyy']
258
+
259
+ second = filtered[1]
260
+ assert_equal 'test.no.change', second[0]
261
+ assert_equal time, second[1]
262
+ assert_nil first[2]['data']
263
+ assert_equal 'first', second[2]['xxx']
264
+ assert_equal 'second2', second[2]['yyy']
265
+
266
+ d = create_driver(CONFIG_LTSV + %[
267
+ reserve_data yes
268
+ ], 'test.no.change')
269
+ time = Time.parse("2012-04-02 18:20:59").to_i
270
+ d.run do
271
+ d.filter({'data' => "xxx:first\tyyy:second", 'xxx' => 'x', 'yyy' => 'y'}, time)
272
+ d.filter({'data' => "xxx:first\tyyy:second2", 'xxx' => 'x', 'yyy' => 'y'}, time)
273
+ end
274
+ filtered = d.filtered_as_array
275
+ assert_equal 2, filtered.length
276
+
277
+ first = filtered[0]
278
+ assert_equal 'test.no.change', first[0]
279
+ assert_equal time, first[1]
280
+ assert_equal "xxx:first\tyyy:second", first[2]['data']
281
+ assert_equal 'first', first[2]['xxx']
282
+ assert_equal 'second', first[2]['yyy']
283
+
284
+ second = filtered[1]
285
+ assert_equal 'test.no.change', second[0]
286
+ assert_equal time, second[1]
287
+ assert_equal "xxx:first\tyyy:second", first[2]['data']
288
+ assert_equal 'first', second[2]['xxx']
289
+ assert_equal 'second2', second[2]['yyy']
290
+
291
+ # convert types
292
+ d = create_driver(CONFIG_LTSV + %[
293
+ types i:integer,s:string,f:float,b:bool
294
+ ], 'test.no.change')
295
+ time = Time.parse("2012-04-02 18:20:59").to_i
296
+ d.run do
297
+ d.filter({'data' => "i:1\ts:2\tf:3\tb:true\tx:123"}, time)
298
+ end
299
+ filtered = d.filtered_as_array
300
+ assert_equal 1, filtered.length
301
+
302
+ first = filtered[0]
303
+ assert_equal 'test.no.change', first[0]
304
+ assert_equal time, first[1]
305
+ assert_equal 1, first[2]['i']
306
+ assert_equal '2', first[2]['s']
307
+ assert_equal 3.0, first[2]['f']
308
+ assert_equal true, first[2]['b']
309
+ assert_equal '123', first[2]['x']
310
+ end
311
+
312
+ CONFIG_TSV = %[
313
+ format tsv
314
+ key_name data
315
+ keys key1,key2,key3
316
+ ]
317
+ def test_filter_tsv
318
+ d = create_driver(CONFIG_TSV, 'test.no.change')
319
+ time = Time.parse("2012-04-02 18:20:59").to_i
320
+ d.run do
321
+ d.filter({'data' => "value1\tvalue2\tvalueThree", 'xxx' => 'x', 'yyy' => 'y'}, time)
322
+ end
323
+ filtered = d.filtered_as_array
324
+ assert_equal 1, filtered.length
325
+
326
+ first = filtered[0]
327
+ assert_equal 'test.no.change', first[0]
328
+ assert_equal time, first[1]
329
+ assert_nil first[2]['data']
330
+ assert_equal 'value1', first[2]['key1']
331
+ assert_equal 'value2', first[2]['key2']
332
+ assert_equal 'valueThree', first[2]['key3']
333
+ end
334
+
335
+ CONFIG_CSV = %[
336
+ format csv
337
+ key_name data
338
+ keys key1,key2,key3
339
+ ]
340
+ def test_filter_csv
341
+ d = create_driver(CONFIG_CSV, 'test.no.change')
342
+ time = Time.parse("2012-04-02 18:20:59").to_i
343
+ d.run do
344
+ d.filter({'data' => 'value1,"value2","value""ThreeYes!"', 'xxx' => 'x', 'yyy' => 'y'}, time)
345
+ end
346
+ filtered = d.filtered_as_array
347
+ assert_equal 1, filtered.length
348
+
349
+ first = filtered[0]
350
+ assert_equal 'test.no.change', first[0]
351
+ assert_equal time, first[1]
352
+ assert_nil first[2]['data']
353
+ assert_equal 'value1', first[2]['key1']
354
+ assert_equal 'value2', first[2]['key2']
355
+ assert_equal 'value"ThreeYes!', first[2]['key3']
356
+ end
357
+
358
+ CONFIG_HASH_VALUE_FIELD = %[
359
+ format json
360
+ key_name data
361
+ hash_value_field parsed
362
+ ]
363
+ CONFIG_HASH_VALUE_FIELD_RESERVE_DATA = %[
364
+ format json
365
+ key_name data
366
+ reserve_data yes
367
+ hash_value_field parsed
368
+ ]
369
+ CONFIG_HASH_VALUE_FIELD_WITH_INJECT_KEY_PREFIX = %[
370
+ format json
371
+ key_name data
372
+ hash_value_field parsed
373
+ inject_key_prefix data.
374
+ ]
375
+ def test_filter_inject_hash_value_field
376
+ original = {'data' => '{"xxx":"first","yyy":"second"}', 'xxx' => 'x', 'yyy' => 'y'}
377
+
378
+ d = create_driver(CONFIG_HASH_VALUE_FIELD, 'test.no.change')
379
+ time = Time.parse("2012-04-02 18:20:59").to_i
380
+ d.run do
381
+ d.filter(original, time)
382
+ end
383
+ filtered = d.filtered_as_array
384
+ assert_equal 1, filtered.length
385
+
386
+ first = filtered[0]
387
+ assert_equal 'test.no.change', first[0]
388
+ assert_equal time, first[1]
389
+
390
+ record = first[2]
391
+ assert_equal 1, record.keys.size
392
+ assert_equal({"xxx"=>"first","yyy"=>"second"}, record['parsed'])
393
+
394
+ d = create_driver(CONFIG_HASH_VALUE_FIELD_RESERVE_DATA, 'test.no.change')
395
+ time = Time.parse("2012-04-02 18:20:59").to_i
396
+ d.run do
397
+ d.filter(original, time)
398
+ end
399
+ filtered = d.filtered_as_array
400
+ assert_equal 1, filtered.length
401
+
402
+ first = filtered[0]
403
+ assert_equal 'test.no.change', first[0]
404
+ assert_equal time, first[1]
405
+
406
+ record = first[2]
407
+ assert_equal 4, record.keys.size
408
+ assert_equal original['data'], record['data']
409
+ assert_equal original['xxx'], record['xxx']
410
+ assert_equal original['yyy'], record['yyy']
411
+ assert_equal({"xxx"=>"first","yyy"=>"second"}, record['parsed'])
412
+
413
+ d = create_driver(CONFIG_HASH_VALUE_FIELD_WITH_INJECT_KEY_PREFIX, 'test.no.change')
414
+ time = Time.parse("2012-04-02 18:20:59").to_i
415
+ d.run do
416
+ d.filter(original, time)
417
+ end
418
+ filtered = d.filtered_as_array
419
+ assert_equal 1, filtered.length
420
+
421
+ first = filtered[0]
422
+ assert_equal 'test.no.change', first[0]
423
+ assert_equal time, first[1]
424
+
425
+ record = first[2]
426
+ assert_equal 1, record.keys.size
427
+ assert_equal({"data.xxx"=>"first","data.yyy"=>"second"}, record['parsed'])
428
+ end
429
+
430
+ CONFIG_DONT_PARSE_TIME = %[
431
+ key_name data
432
+ format json
433
+ time_parse no
434
+ ]
435
+ def test_time_should_be_reserved
436
+ t = Time.now.to_i
437
+ d = create_driver(CONFIG_DONT_PARSE_TIME, 'test.no.change')
438
+
439
+ d.run do
440
+ d.filter({'data' => '{"time":1383190430, "f1":"v1"}'}, t)
441
+ d.filter({'data' => '{"time":"1383190430", "f1":"v1"}'}, t)
442
+ d.filter({'data' => '{"time":"2013-10-31 12:34:03 +0900", "f1":"v1"}'}, t)
443
+ end
444
+ filtered = d.filtered_as_array
445
+ assert_equal 3, filtered.length
446
+
447
+ assert_equal 'test.no.change', filtered[0][0]
448
+ assert_equal 'v1', filtered[0][2]['f1']
449
+ assert_equal 1383190430, filtered[0][2]['time']
450
+ assert_equal t, filtered[0][1]
451
+
452
+ assert_equal 'test.no.change', filtered[1][0]
453
+ assert_equal 'v1', filtered[1][2]['f1']
454
+ assert_equal "1383190430", filtered[1][2]['time']
455
+ assert_equal t, filtered[1][1]
456
+
457
+ assert_equal 'test.no.change', filtered[2][0]
458
+ assert_equal 'v1', filtered[2][2]['f1']
459
+ assert_equal '2013-10-31 12:34:03 +0900', filtered[2][2]['time']
460
+ assert_equal t, filtered[2][1]
461
+ end
462
+
463
+ CONFIG_INVALID_TIME_VALUE = %[
464
+ remove_prefix test
465
+ key_name data
466
+ format json
467
+ ] # 'time' is implicit @time_key
468
+ def test_filter_invalid_time_data
469
+ # should not raise errors
470
+ t = Time.now.to_i
471
+ d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.no.change')
472
+ assert_nothing_raised {
473
+ d.run do
474
+ d.filter({'data' => '{"time":[], "f1":"v1"}'}, t)
475
+ d.filter({'data' => '{"time":"thisisnottime", "f1":"v1"}'}, t)
476
+ end
477
+ }
478
+ filtered = d.filtered_as_array
479
+ assert_equal 1, filtered.length
480
+
481
+ assert_equal 'test.no.change', filtered[0][0]
482
+ assert_equal 0, filtered[0][1]
483
+ assert_equal 'v1', filtered[0][2]['f1']
484
+ assert_equal 0, filtered[0][2]['time'].to_i
485
+ end
486
+
487
+ # REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
488
+
489
+ CONFIG_NOT_REPLACE = %[
490
+ remove_prefix test
491
+ key_name data
492
+ format /^(?<message>.*)$/
493
+ ]
494
+ CONFIG_INVALID_BYTE = CONFIG_NOT_REPLACE + %[
495
+ replace_invalid_sequence true
496
+ ]
497
+ def test_filter_invalid_byte
498
+ invalid_utf8 = "\xff".force_encoding('UTF-8')
499
+
500
+ d = create_driver(CONFIG_NOT_REPLACE, 'test.no.change')
501
+ assert_raise(ArgumentError) {
502
+ d.run do
503
+ d.filter({'data' => invalid_utf8}, Time.now.to_i)
504
+ end
505
+ }
506
+
507
+ d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
508
+ assert_nothing_raised {
509
+ d.run do
510
+ d.emit({'data' => invalid_utf8}, Time.now.to_i)
511
+ end
512
+ }
513
+ filtered = d.filtered_as_array
514
+ assert_equal 1, filtered.length
515
+ assert_nil filtered[0][2]['data']
516
+ assert_equal '?'.force_encoding('UTF-8'), filtered[0][2]['message']
517
+
518
+ d = create_driver(CONFIG_INVALID_BYTE + %[
519
+ reserve_data yes
520
+ ], 'test.no.change')
521
+ assert_nothing_raised {
522
+ d.run do
523
+ d.filter({'data' => invalid_utf8}, Time.now.to_i)
524
+ end
525
+ }
526
+ filtered = d.filtered_as_array
527
+ assert_equal 1, filtered.length
528
+ assert_equal invalid_utf8, filtered[0][2]['data']
529
+ assert_equal '?'.force_encoding('UTF-8'), filtered[0][2]['message']
530
+
531
+ invalid_ascii = "\xff".force_encoding('US-ASCII')
532
+ d = create_driver(CONFIG_INVALID_BYTE, 'test.no.change')
533
+ assert_nothing_raised {
534
+ d.run do
535
+ d.filter({'data' => invalid_ascii}, Time.now.to_i)
536
+ end
537
+ }
538
+ filtered = d.filtered_as_array
539
+ assert_equal 1, filtered.length
540
+ assert_nil filtered[0][2]['data']
541
+ assert_equal '?'.force_encoding('US-ASCII'), filtered[0][2]['message']
542
+ end
543
+
544
+ # suppress_parse_error_log test
545
+ CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG = %[
546
+ tag hogelog
547
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
548
+ key_name message
549
+ suppress_parse_error_log false
550
+ ]
551
+ CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG = %[
552
+ tag hogelog
553
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
554
+ key_name message
555
+ suppress_parse_error_log true
556
+ ]
557
+ CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG = %[
558
+ tag hogelog
559
+ format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
560
+ key_name message
561
+ ]
562
+
563
+ INVALID_MESSAGE = 'foo bar'
564
+ VALID_MESSAGE = 'col1=foo col2=bar'
565
+
566
+ # if call warn() raise exception
567
+ class DummyLoggerWarnedException < StandardError; end
568
+ class DummyLogger
569
+ def warn(message)
570
+ raise DummyLoggerWarnedException
571
+ end
572
+ end
573
+
574
+ def swap_logger(instance)
575
+ raise "use with block" unless block_given?
576
+ dummy = DummyLogger.new
577
+ saved_logger = instance.log
578
+ instance.log = dummy
579
+ restore = lambda{ instance.log = saved_logger }
580
+
581
+ yield
582
+
583
+ restore.call
584
+ end
585
+
586
+ def test_parser_error_warning
587
+ d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.no.change')
588
+ swap_logger(d.instance) do
589
+ assert_raise(DummyLoggerWarnedException) {
590
+ d.run do
591
+ d.filter({'data' => '{"time":[], "f1":"v1"}'}, Time.now.to_i)
592
+ end
593
+ }
594
+ end
595
+ end
596
+
597
+ class DefaultSuppressParseErrorLogTest < self
598
+ def setup
599
+ # default(disabled) 'suppress_parse_error_log' is not specify
600
+ @d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.no.change')
601
+ end
602
+
603
+ def test_raise_exception
604
+ swap_logger(@d.instance) do
605
+ assert_raise(DummyLoggerWarnedException) {
606
+ @d.run do
607
+ @d.filter({'message' => INVALID_MESSAGE}, Time.now.to_i)
608
+ end
609
+ }
610
+ end
611
+ end
612
+
613
+ def test_nothing_raised
614
+ swap_logger(@d.instance) do
615
+ assert_nothing_raised {
616
+ @d.run do
617
+ @d.filter({'message' => VALID_MESSAGE}, Time.now.to_i)
618
+ end
619
+ }
620
+ end
621
+ end
622
+ end
623
+
624
+ class DisabledSuppressParseErrorLogTest < self
625
+ def setup
626
+ # disabled 'suppress_parse_error_log'
627
+ @d = create_driver(CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.no.change')
628
+ end
629
+
630
+ def test_raise_exception
631
+ swap_logger(@d.instance) do
632
+ assert_raise(DummyLoggerWarnedException) {
633
+ @d.run do
634
+ @d.filter({'message' => INVALID_MESSAGE}, Time.now.to_i)
635
+ end
636
+ }
637
+ end
638
+ end
639
+
640
+ def test_nothing_raised
641
+ swap_logger(@d.instance) do
642
+ assert_nothing_raised {
643
+ @d.run do
644
+ @d.filter({'message' => VALID_MESSAGE}, Time.now.to_i)
645
+ end
646
+ }
647
+ end
648
+ end
649
+ end
650
+
651
+ class EnabledSuppressParseErrorLogTest < self
652
+ def setup
653
+ # enabled 'suppress_parse_error_log'
654
+ @d = create_driver(CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.no.change')
655
+ end
656
+
657
+ def test_nothing_raised
658
+ swap_logger(@d.instance) do
659
+ assert_nothing_raised {
660
+ @d.run do
661
+ @d.filter({'message' => INVALID_MESSAGE}, Time.now.to_i)
662
+ @d.filter({'message' => VALID_MESSAGE}, Time.now.to_i)
663
+ end
664
+ }
665
+ end
666
+ end
667
+ end
668
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-04 00:00:00.000000000 Z
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: test-unit
@@ -67,11 +67,15 @@ files:
67
67
  - README.md
68
68
  - Rakefile
69
69
  - fluent-plugin-parser.gemspec
70
+ - lib/fluent/plugin/filter_deparser.rb
71
+ - lib/fluent/plugin/filter_parser.rb
70
72
  - lib/fluent/plugin/out_deparser.rb
71
73
  - lib/fluent/plugin/out_parser.rb
72
74
  - test/custom_parser.rb
73
75
  - test/helper.rb
74
76
  - test/plugin/test_deparser.rb
77
+ - test/plugin/test_filter_deparser.rb
78
+ - test/plugin/test_filter_parser.rb
75
79
  - test/plugin/test_out_parser.rb
76
80
  - test/plugin/test_out_parser_for_parsers.rb
77
81
  homepage: https://github.com/tagomoris/fluent-plugin-parser
@@ -102,5 +106,8 @@ test_files:
102
106
  - test/custom_parser.rb
103
107
  - test/helper.rb
104
108
  - test/plugin/test_deparser.rb
109
+ - test/plugin/test_filter_deparser.rb
110
+ - test/plugin/test_filter_parser.rb
105
111
  - test/plugin/test_out_parser.rb
106
112
  - test/plugin/test_out_parser_for_parsers.rb
113
+ has_rdoc: