fluent-plugin-parser 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/fluent-plugin-parser.gemspec +1 -1
- data/lib/fluent/plugin/filter_deparser.rb +35 -0
- data/lib/fluent/plugin/filter_parser.rb +105 -0
- data/test/helper.rb +2 -0
- data/test/plugin/test_filter_deparser.rb +82 -0
- data/test/plugin/test_filter_parser.rb +668 -0
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e389a96d9ae681aa25c0de95d9d1abb9b1c4189b
|
4
|
+
data.tar.gz: 3808413fd2cdd85483eeca9d01cdaed62e4fe1de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7174e622a987623dced94a830681e5d6f31742e2e2205964c709f9cccebca4a8a0b89b1ad082ded4cc9a8aedc8b71fe74e38af360bf33522476845639817d7c5
|
7
|
+
data.tar.gz: d6ea265808450c244ae34cc05efc62ef2e64a48b3b0c92327db373c49383d4d852b3ceb485dcf426d8200efc993fbf7ce3691ae8691f810296e85b19767781ef
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-parser"
|
4
|
-
gem.version = "0.
|
4
|
+
gem.version = "0.6.0"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class Fluent::DeparserFilter < Fluent::Output
|
2
|
+
Fluent::Plugin.register_filter('deparser', self)
|
3
|
+
|
4
|
+
config_param :format, :string
|
5
|
+
config_param :format_key_names, :string
|
6
|
+
config_param :key_name, :string, default: 'message'
|
7
|
+
config_param :reserve_data, :bool, default: false
|
8
|
+
|
9
|
+
def configure(conf)
|
10
|
+
super
|
11
|
+
|
12
|
+
@format_key_names = @format_key_names.split(',')
|
13
|
+
begin
|
14
|
+
dummy = @format % (["x"] * @format_key_names.length)
|
15
|
+
rescue ArgumentError
|
16
|
+
raise Fluent::ConfigError, "mismatch between placeholder of format and format_key_names"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def filter_stream(tag, es)
|
21
|
+
new_es = Fluent::MultiEventStream.new
|
22
|
+
if @reserve_data
|
23
|
+
es.each {|time,record|
|
24
|
+
new_record = {@key_name => (@format % @format_key_names.map{|k| record[k]})}
|
25
|
+
new_es.add(time, record.merge(new_record))
|
26
|
+
}
|
27
|
+
else
|
28
|
+
es.each {|time,record|
|
29
|
+
new_record = {@key_name => (@format % @format_key_names.map{|k| record[k]})}
|
30
|
+
new_es.add(time, new_record)
|
31
|
+
}
|
32
|
+
end
|
33
|
+
new_es
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
require 'fluent/parser'
|
2
|
+
|
3
|
+
class Fluent::ParserFilter < Fluent::Filter
|
4
|
+
Fluent::Plugin.register_filter('parser', self)
|
5
|
+
|
6
|
+
config_param :key_name, :string
|
7
|
+
config_param :reserve_data, :bool, default: false
|
8
|
+
config_param :inject_key_prefix, :string, default: nil
|
9
|
+
config_param :replace_invalid_sequence, :bool, default: false
|
10
|
+
config_param :hash_value_field, :string, default: nil
|
11
|
+
config_param :suppress_parse_error_log, :bool, default: false
|
12
|
+
config_param :time_parse, :bool, default: true
|
13
|
+
|
14
|
+
attr_reader :parser
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
super
|
18
|
+
require 'time'
|
19
|
+
end
|
20
|
+
|
21
|
+
def configure(conf)
|
22
|
+
super
|
23
|
+
|
24
|
+
@parser = Fluent::TextParser.new
|
25
|
+
@parser.estimate_current_event = false
|
26
|
+
@parser.configure(conf)
|
27
|
+
if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
|
28
|
+
# disable parse time
|
29
|
+
@parser.parser.time_key = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
self
|
33
|
+
end
|
34
|
+
|
35
|
+
def filter_stream(tag, es)
|
36
|
+
new_es = Fluent::MultiEventStream.new
|
37
|
+
es.each do |time,record|
|
38
|
+
raw_value = record[@key_name]
|
39
|
+
begin
|
40
|
+
@parser.parse(raw_value) do |t,values|
|
41
|
+
if values
|
42
|
+
t ||= time
|
43
|
+
r = handle_parsed(tag, record, t, values)
|
44
|
+
new_es.add(t, r)
|
45
|
+
else
|
46
|
+
log.warn "pattern not match with data '#{raw_value}'" unless @suppress_parse_error_log
|
47
|
+
if @reserve_data
|
48
|
+
t = time
|
49
|
+
r = handle_parsed(tag, record, time, {})
|
50
|
+
new_es.add(t, r)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
rescue Fluent::TextParser::ParserError => e
|
55
|
+
log.warn e.message unless @suppress_parse_error_log
|
56
|
+
rescue ArgumentError => e
|
57
|
+
if @replace_invalid_sequence
|
58
|
+
unless e.message.index("invalid byte sequence in") == 0
|
59
|
+
raise
|
60
|
+
end
|
61
|
+
replaced_string = replace_invalid_byte(raw_value)
|
62
|
+
@parser.parse(replaced_string) do |t,values|
|
63
|
+
if values
|
64
|
+
t ||= time
|
65
|
+
r = handle_parsed(tag, record, t, values)
|
66
|
+
new_es.add(t, r)
|
67
|
+
else
|
68
|
+
log.warn "pattern not match with data '#{raw_value}'" unless @suppress_parse_error_log
|
69
|
+
if @reserve_data
|
70
|
+
t = time
|
71
|
+
r = handle_parsed(tag, record, time, {})
|
72
|
+
new_es.add(t, r)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
else
|
77
|
+
raise
|
78
|
+
end
|
79
|
+
rescue => e
|
80
|
+
log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
|
81
|
+
end
|
82
|
+
end
|
83
|
+
new_es
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def handle_parsed(tag, record, t, values)
|
89
|
+
if values && @inject_key_prefix
|
90
|
+
values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
|
91
|
+
end
|
92
|
+
r = @hash_value_field ? {@hash_value_field => values} : values
|
93
|
+
if @reserve_data
|
94
|
+
r = r ? record.merge(r) : record
|
95
|
+
end
|
96
|
+
r
|
97
|
+
end
|
98
|
+
|
99
|
+
def replace_invalid_byte(string)
|
100
|
+
replace_options = { invalid: :replace, undef: :replace, replace: '?' }
|
101
|
+
original_encoding = string.encoding
|
102
|
+
temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
|
103
|
+
string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
|
104
|
+
end
|
105
|
+
end
|
data/test/helper.rb
CHANGED
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class DeparserFilterTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
format %s: %s %s %s
|
10
|
+
format_key_names host,path,status,size
|
11
|
+
key_name fulltext
|
12
|
+
reserve_data true
|
13
|
+
]
|
14
|
+
|
15
|
+
def create_driver(conf=CONFIG,tag='test')
|
16
|
+
Fluent::Test::FilterTestDriver.new(Fluent::DeparserFilter, tag).configure(conf)
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_configure
|
20
|
+
assert_raise(Fluent::ConfigError) {
|
21
|
+
d = create_driver('')
|
22
|
+
}
|
23
|
+
|
24
|
+
d = create_driver %[
|
25
|
+
format %s: %s %s
|
26
|
+
format_key_names x,y,z
|
27
|
+
]
|
28
|
+
assert_equal '%s: %s %s', d.instance.format
|
29
|
+
assert_equal ['x','y','z'], d.instance.format_key_names
|
30
|
+
assert_equal 'message', d.instance.key_name
|
31
|
+
assert_equal false, d.instance.reserve_data
|
32
|
+
end
|
33
|
+
|
34
|
+
# CONFIG = %[
|
35
|
+
# format %s: %s %s %s
|
36
|
+
# format_key_names host path status size
|
37
|
+
# key_name fulltext
|
38
|
+
# reserve_data true
|
39
|
+
# ]
|
40
|
+
def test_filter
|
41
|
+
d1 = create_driver(CONFIG, 'test.no.change')
|
42
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
43
|
+
d1.run do
|
44
|
+
d1.filter({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
|
45
|
+
d1.filter({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
|
46
|
+
end
|
47
|
+
filtered = d1.filtered_as_array
|
48
|
+
assert_equal 2, filtered.length
|
49
|
+
first = filtered[0]
|
50
|
+
assert_equal 'test.no.change', first[0]
|
51
|
+
assert_equal time, first[1]
|
52
|
+
assert_equal 'xxx.local: /f/1 200 300', first[2]['fulltext']
|
53
|
+
assert_equal ['fulltext','host','path','size','status'], first[2].keys.sort
|
54
|
+
second = filtered[1]
|
55
|
+
assert_equal 'test.no.change', second[0]
|
56
|
+
assert_equal time, second[1]
|
57
|
+
assert_equal 'yyy.local: /f/2 302 512', second[2]['fulltext']
|
58
|
+
assert_equal ['fulltext','host','path','size','status'], second[2].keys.sort
|
59
|
+
|
60
|
+
d2 = create_driver(%[
|
61
|
+
format %s: %s %s
|
62
|
+
format_key_names host,path,status
|
63
|
+
], 'test.no.change')
|
64
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
65
|
+
d2.run do
|
66
|
+
d2.filter({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
|
67
|
+
d2.filter({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
|
68
|
+
end
|
69
|
+
filtered = d2.filtered_as_array
|
70
|
+
assert_equal 2, filtered.length
|
71
|
+
first = filtered[0]
|
72
|
+
assert_equal 'test.no.change', first[0]
|
73
|
+
assert_equal time, first[1]
|
74
|
+
assert_equal 'xxx.local: /f/1 200', first[2]['message']
|
75
|
+
assert_equal ['message'], first[2].keys.sort
|
76
|
+
second = filtered[1]
|
77
|
+
assert_equal 'test.no.change', second[0]
|
78
|
+
assert_equal time, second[1]
|
79
|
+
assert_equal 'yyy.local: /f/2 302', second[2]['message']
|
80
|
+
assert_equal ['message'], second[2].keys.sort
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,668 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class ParserFilterTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
key_name message
|
10
|
+
format /^(?<x>.)(?<y>.) (?<time>.+)$/
|
11
|
+
time_format %Y%m%d%H%M%S
|
12
|
+
reserve_data true
|
13
|
+
]
|
14
|
+
|
15
|
+
def create_driver(conf=CONFIG,tag='test')
|
16
|
+
Fluent::Test::FilterTestDriver.new(Fluent::ParserFilter, tag).configure(conf)
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_configure
|
20
|
+
assert_raise(Fluent::ConfigError) {
|
21
|
+
d = create_driver('')
|
22
|
+
}
|
23
|
+
assert_raise(Fluent::ConfigError) {
|
24
|
+
d = create_driver %[
|
25
|
+
format unknown_format_that_will_never_be_implemented
|
26
|
+
key_name foo
|
27
|
+
]
|
28
|
+
}
|
29
|
+
assert_nothing_raised {
|
30
|
+
d = create_driver %[
|
31
|
+
format /(?<x>.)/
|
32
|
+
key_name foo
|
33
|
+
]
|
34
|
+
}
|
35
|
+
assert_nothing_raised {
|
36
|
+
d = create_driver %[
|
37
|
+
format /(?<x>.)/
|
38
|
+
key_name foo
|
39
|
+
]
|
40
|
+
}
|
41
|
+
assert_nothing_raised {
|
42
|
+
d = create_driver %[
|
43
|
+
format /(?<x>.)/
|
44
|
+
key_name foo
|
45
|
+
]
|
46
|
+
}
|
47
|
+
assert_nothing_raised {
|
48
|
+
d = create_driver %[
|
49
|
+
format /(?<x>.)/
|
50
|
+
key_name foo
|
51
|
+
]
|
52
|
+
}
|
53
|
+
assert_nothing_raised {
|
54
|
+
d = create_driver %[
|
55
|
+
format json
|
56
|
+
key_name foo
|
57
|
+
]
|
58
|
+
}
|
59
|
+
assert_nothing_raised {
|
60
|
+
d = create_driver %[
|
61
|
+
format ltsv
|
62
|
+
key_name foo
|
63
|
+
]
|
64
|
+
}
|
65
|
+
assert_nothing_raised {
|
66
|
+
d = create_driver %[
|
67
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
68
|
+
key_name message
|
69
|
+
suppress_parse_error_log true
|
70
|
+
]
|
71
|
+
}
|
72
|
+
assert_nothing_raised {
|
73
|
+
d = create_driver %[
|
74
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
75
|
+
key_name message
|
76
|
+
suppress_parse_error_log false
|
77
|
+
]
|
78
|
+
}
|
79
|
+
d = create_driver %[
|
80
|
+
key_name foo
|
81
|
+
format /(?<x>.)/
|
82
|
+
]
|
83
|
+
assert_equal false, d.instance.reserve_data
|
84
|
+
end
|
85
|
+
|
86
|
+
# CONFIG = %[
|
87
|
+
# remove_prefix test
|
88
|
+
# add_prefix parsed
|
89
|
+
# key_name message
|
90
|
+
# format /^(?<x>.)(?<y>.) (?<time>.+)$/
|
91
|
+
# time_format %Y%m%d%H%M%S
|
92
|
+
# reserve_data true
|
93
|
+
# ]
|
94
|
+
def test_filter
|
95
|
+
d1 = create_driver(CONFIG, 'test.no.change')
|
96
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
97
|
+
d1.run do
|
98
|
+
d1.filter({'message' => '12 20120402182059'}, time)
|
99
|
+
d1.filter({'message' => '34 20120402182100'}, time)
|
100
|
+
d1.filter({'message' => '56 20120402182100'}, time)
|
101
|
+
d1.filter({'message' => '78 20120402182101'}, time)
|
102
|
+
d1.filter({'message' => '90 20120402182100'}, time)
|
103
|
+
end
|
104
|
+
filtered = d1.filtered_as_array
|
105
|
+
assert_equal 5, filtered.length
|
106
|
+
|
107
|
+
first = filtered[0]
|
108
|
+
assert_equal 'test.no.change', first[0]
|
109
|
+
assert_equal Time.parse("2012-04-02 18:20:59").to_i, first[1]
|
110
|
+
assert_equal '1', first[2]['x']
|
111
|
+
assert_equal '2', first[2]['y']
|
112
|
+
assert_equal '12 20120402182059', first[2]['message']
|
113
|
+
|
114
|
+
second = filtered[1]
|
115
|
+
assert_equal 'test.no.change', second[0]
|
116
|
+
assert_equal Time.parse("2012-04-02 18:21:00").to_i, second[1]
|
117
|
+
assert_equal '3', second[2]['x']
|
118
|
+
assert_equal '4', second[2]['y']
|
119
|
+
|
120
|
+
third = filtered[2]
|
121
|
+
assert_equal 'test.no.change', third[0]
|
122
|
+
assert_equal Time.parse("2012-04-02 18:21:00").to_i, third[1]
|
123
|
+
assert_equal '5', third[2]['x']
|
124
|
+
assert_equal '6', third[2]['y']
|
125
|
+
|
126
|
+
fourth = filtered[3]
|
127
|
+
assert_equal 'test.no.change', fourth[0]
|
128
|
+
assert_equal Time.parse("2012-04-02 18:21:01").to_i, fourth[1]
|
129
|
+
assert_equal '7', fourth[2]['x']
|
130
|
+
assert_equal '8', fourth[2]['y']
|
131
|
+
|
132
|
+
fifth = filtered[4]
|
133
|
+
assert_equal 'test.no.change', fifth[0]
|
134
|
+
assert_equal Time.parse("2012-04-02 18:21:00").to_i, fifth[1]
|
135
|
+
assert_equal '9', fifth[2]['x']
|
136
|
+
assert_equal '0', fifth[2]['y']
|
137
|
+
|
138
|
+
d2 = create_driver(%[
|
139
|
+
tag parsed
|
140
|
+
key_name data
|
141
|
+
format /^(?<x>.)(?<y>.) (?<t>.+)$/
|
142
|
+
], 'test.no.change')
|
143
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
144
|
+
d2.run do
|
145
|
+
d2.filter({'data' => '12 20120402182059'}, time)
|
146
|
+
d2.filter({'data' => '34 20120402182100'}, time)
|
147
|
+
end
|
148
|
+
filtered = d2.filtered_as_array
|
149
|
+
assert_equal 2, filtered.length
|
150
|
+
|
151
|
+
first = filtered[0]
|
152
|
+
assert_equal 'test.no.change', first[0]
|
153
|
+
assert_equal time, first[1]
|
154
|
+
assert_nil first[2]['data']
|
155
|
+
assert_equal '1', first[2]['x']
|
156
|
+
assert_equal '2', first[2]['y']
|
157
|
+
assert_equal '20120402182059', first[2]['t']
|
158
|
+
|
159
|
+
second = filtered[1]
|
160
|
+
assert_equal 'test.no.change', second[0]
|
161
|
+
assert_equal time, second[1]
|
162
|
+
assert_nil second[2]['data']
|
163
|
+
assert_equal '3', second[2]['x']
|
164
|
+
assert_equal '4', second[2]['y']
|
165
|
+
assert_equal '20120402182100', second[2]['t']
|
166
|
+
|
167
|
+
d3 = create_driver(%[
|
168
|
+
tag parsed
|
169
|
+
key_name data
|
170
|
+
format /^(?<x>[0-9])(?<y>[0-9]) (?<t>.+)$/
|
171
|
+
], 'test.no.change')
|
172
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
173
|
+
d3.run do
|
174
|
+
d3.filter({'data' => '12 20120402182059'}, time)
|
175
|
+
d3.filter({'data' => '34 20120402182100'}, time)
|
176
|
+
d3.filter({'data' => 'xy 20120402182101'}, time)
|
177
|
+
end
|
178
|
+
filtered = d3.filtered_as_array
|
179
|
+
assert_equal 2, filtered.length
|
180
|
+
|
181
|
+
d3x = create_driver(%[
|
182
|
+
tag parsed
|
183
|
+
key_name data
|
184
|
+
format /^(?<x>\\d)(?<y>\\d) (?<t>.+)$/
|
185
|
+
reserve_data yes
|
186
|
+
], 'test.no.change')
|
187
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
188
|
+
d3x.run do
|
189
|
+
d3x.filter({'data' => '12 20120402182059'}, time)
|
190
|
+
d3x.filter({'data' => '34 20120402182100'}, time)
|
191
|
+
d3x.filter({'data' => 'xy 20120402182101'}, time)
|
192
|
+
end
|
193
|
+
filtered = d3x.filtered_as_array
|
194
|
+
assert_equal 3, filtered.length
|
195
|
+
|
196
|
+
d4 = create_driver(%[
|
197
|
+
tag parsed
|
198
|
+
key_name data
|
199
|
+
format json
|
200
|
+
], 'test.no.change')
|
201
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
202
|
+
d4.run do
|
203
|
+
d4.filter({'data' => '{"xxx":"first","yyy":"second"}', 'xxx' => 'x', 'yyy' => 'y'}, time)
|
204
|
+
d4.filter({'data' => 'foobar', 'xxx' => 'x', 'yyy' => 'y'}, time)
|
205
|
+
end
|
206
|
+
filtered = d4.filtered_as_array
|
207
|
+
assert_equal 1, filtered.length
|
208
|
+
|
209
|
+
d4x = create_driver(%[
|
210
|
+
tag parsed
|
211
|
+
key_name data
|
212
|
+
format json
|
213
|
+
reserve_data yes
|
214
|
+
], 'test.no.change')
|
215
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
216
|
+
d4x.run do
|
217
|
+
d4x.filter({'data' => '{"xxx":"first","yyy":"second"}', 'xxx' => 'x', 'yyy' => 'y'}, time)
|
218
|
+
d4x.filter({'data' => 'foobar', 'xxx' => 'x', 'yyy' => 'y'}, time)
|
219
|
+
end
|
220
|
+
filtered = d4x.filtered_as_array
|
221
|
+
assert_equal 2, filtered.length
|
222
|
+
|
223
|
+
first = filtered[0]
|
224
|
+
assert_equal 'test.no.change', first[0]
|
225
|
+
assert_equal time, first[1]
|
226
|
+
assert_equal '{"xxx":"first","yyy":"second"}', first[2]['data']
|
227
|
+
assert_equal 'first', first[2]['xxx']
|
228
|
+
assert_equal 'second', first[2]['yyy']
|
229
|
+
|
230
|
+
second = filtered[1]
|
231
|
+
assert_equal 'test.no.change', second[0]
|
232
|
+
assert_equal time, second[1]
|
233
|
+
assert_equal 'foobar', second[2]['data']
|
234
|
+
assert_equal 'x', second[2]['xxx']
|
235
|
+
assert_equal 'y', second[2]['yyy']
|
236
|
+
end
|
237
|
+
|
238
|
+
CONFIG_LTSV = %[
|
239
|
+
format ltsv
|
240
|
+
key_name data
|
241
|
+
]
|
242
|
+
def test_filter_ltsv
|
243
|
+
d = create_driver(CONFIG_LTSV, 'test.no.change')
|
244
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
245
|
+
d.run do
|
246
|
+
d.filter({'data' => "xxx:first\tyyy:second", 'xxx' => 'x', 'yyy' => 'y'}, time)
|
247
|
+
d.filter({'data' => "xxx:first\tyyy:second2", 'xxx' => 'x', 'yyy' => 'y'}, time)
|
248
|
+
end
|
249
|
+
filtered = d.filtered_as_array
|
250
|
+
assert_equal 2, filtered.length
|
251
|
+
|
252
|
+
first = filtered[0]
|
253
|
+
assert_equal 'test.no.change', first[0]
|
254
|
+
assert_equal time, first[1]
|
255
|
+
assert_nil first[2]['data']
|
256
|
+
assert_equal 'first', first[2]['xxx']
|
257
|
+
assert_equal 'second', first[2]['yyy']
|
258
|
+
|
259
|
+
second = filtered[1]
|
260
|
+
assert_equal 'test.no.change', second[0]
|
261
|
+
assert_equal time, second[1]
|
262
|
+
assert_nil first[2]['data']
|
263
|
+
assert_equal 'first', second[2]['xxx']
|
264
|
+
assert_equal 'second2', second[2]['yyy']
|
265
|
+
|
266
|
+
d = create_driver(CONFIG_LTSV + %[
|
267
|
+
reserve_data yes
|
268
|
+
], 'test.no.change')
|
269
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
270
|
+
d.run do
|
271
|
+
d.filter({'data' => "xxx:first\tyyy:second", 'xxx' => 'x', 'yyy' => 'y'}, time)
|
272
|
+
d.filter({'data' => "xxx:first\tyyy:second2", 'xxx' => 'x', 'yyy' => 'y'}, time)
|
273
|
+
end
|
274
|
+
filtered = d.filtered_as_array
|
275
|
+
assert_equal 2, filtered.length
|
276
|
+
|
277
|
+
first = filtered[0]
|
278
|
+
assert_equal 'test.no.change', first[0]
|
279
|
+
assert_equal time, first[1]
|
280
|
+
assert_equal "xxx:first\tyyy:second", first[2]['data']
|
281
|
+
assert_equal 'first', first[2]['xxx']
|
282
|
+
assert_equal 'second', first[2]['yyy']
|
283
|
+
|
284
|
+
second = filtered[1]
|
285
|
+
assert_equal 'test.no.change', second[0]
|
286
|
+
assert_equal time, second[1]
|
287
|
+
assert_equal "xxx:first\tyyy:second", first[2]['data']
|
288
|
+
assert_equal 'first', second[2]['xxx']
|
289
|
+
assert_equal 'second2', second[2]['yyy']
|
290
|
+
|
291
|
+
# convert types
|
292
|
+
d = create_driver(CONFIG_LTSV + %[
|
293
|
+
types i:integer,s:string,f:float,b:bool
|
294
|
+
], 'test.no.change')
|
295
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
296
|
+
d.run do
|
297
|
+
d.filter({'data' => "i:1\ts:2\tf:3\tb:true\tx:123"}, time)
|
298
|
+
end
|
299
|
+
filtered = d.filtered_as_array
|
300
|
+
assert_equal 1, filtered.length
|
301
|
+
|
302
|
+
first = filtered[0]
|
303
|
+
assert_equal 'test.no.change', first[0]
|
304
|
+
assert_equal time, first[1]
|
305
|
+
assert_equal 1, first[2]['i']
|
306
|
+
assert_equal '2', first[2]['s']
|
307
|
+
assert_equal 3.0, first[2]['f']
|
308
|
+
assert_equal true, first[2]['b']
|
309
|
+
assert_equal '123', first[2]['x']
|
310
|
+
end
|
311
|
+
|
312
|
+
CONFIG_TSV = %[
|
313
|
+
format tsv
|
314
|
+
key_name data
|
315
|
+
keys key1,key2,key3
|
316
|
+
]
|
317
|
+
def test_filter_tsv
|
318
|
+
d = create_driver(CONFIG_TSV, 'test.no.change')
|
319
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
320
|
+
d.run do
|
321
|
+
d.filter({'data' => "value1\tvalue2\tvalueThree", 'xxx' => 'x', 'yyy' => 'y'}, time)
|
322
|
+
end
|
323
|
+
filtered = d.filtered_as_array
|
324
|
+
assert_equal 1, filtered.length
|
325
|
+
|
326
|
+
first = filtered[0]
|
327
|
+
assert_equal 'test.no.change', first[0]
|
328
|
+
assert_equal time, first[1]
|
329
|
+
assert_nil first[2]['data']
|
330
|
+
assert_equal 'value1', first[2]['key1']
|
331
|
+
assert_equal 'value2', first[2]['key2']
|
332
|
+
assert_equal 'valueThree', first[2]['key3']
|
333
|
+
end
|
334
|
+
|
335
|
+
CONFIG_CSV = %[
|
336
|
+
format csv
|
337
|
+
key_name data
|
338
|
+
keys key1,key2,key3
|
339
|
+
]
|
340
|
+
def test_filter_csv
|
341
|
+
d = create_driver(CONFIG_CSV, 'test.no.change')
|
342
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
343
|
+
d.run do
|
344
|
+
d.filter({'data' => 'value1,"value2","value""ThreeYes!"', 'xxx' => 'x', 'yyy' => 'y'}, time)
|
345
|
+
end
|
346
|
+
filtered = d.filtered_as_array
|
347
|
+
assert_equal 1, filtered.length
|
348
|
+
|
349
|
+
first = filtered[0]
|
350
|
+
assert_equal 'test.no.change', first[0]
|
351
|
+
assert_equal time, first[1]
|
352
|
+
assert_nil first[2]['data']
|
353
|
+
assert_equal 'value1', first[2]['key1']
|
354
|
+
assert_equal 'value2', first[2]['key2']
|
355
|
+
assert_equal 'value"ThreeYes!', first[2]['key3']
|
356
|
+
end
|
357
|
+
|
358
|
+
CONFIG_HASH_VALUE_FIELD = %[
|
359
|
+
format json
|
360
|
+
key_name data
|
361
|
+
hash_value_field parsed
|
362
|
+
]
|
363
|
+
CONFIG_HASH_VALUE_FIELD_RESERVE_DATA = %[
|
364
|
+
format json
|
365
|
+
key_name data
|
366
|
+
reserve_data yes
|
367
|
+
hash_value_field parsed
|
368
|
+
]
|
369
|
+
CONFIG_HASH_VALUE_FIELD_WITH_INJECT_KEY_PREFIX = %[
|
370
|
+
format json
|
371
|
+
key_name data
|
372
|
+
hash_value_field parsed
|
373
|
+
inject_key_prefix data.
|
374
|
+
]
|
375
|
+
def test_filter_inject_hash_value_field
|
376
|
+
original = {'data' => '{"xxx":"first","yyy":"second"}', 'xxx' => 'x', 'yyy' => 'y'}
|
377
|
+
|
378
|
+
d = create_driver(CONFIG_HASH_VALUE_FIELD, 'test.no.change')
|
379
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
380
|
+
d.run do
|
381
|
+
d.filter(original, time)
|
382
|
+
end
|
383
|
+
filtered = d.filtered_as_array
|
384
|
+
assert_equal 1, filtered.length
|
385
|
+
|
386
|
+
first = filtered[0]
|
387
|
+
assert_equal 'test.no.change', first[0]
|
388
|
+
assert_equal time, first[1]
|
389
|
+
|
390
|
+
record = first[2]
|
391
|
+
assert_equal 1, record.keys.size
|
392
|
+
assert_equal({"xxx"=>"first","yyy"=>"second"}, record['parsed'])
|
393
|
+
|
394
|
+
d = create_driver(CONFIG_HASH_VALUE_FIELD_RESERVE_DATA, 'test.no.change')
|
395
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
396
|
+
d.run do
|
397
|
+
d.filter(original, time)
|
398
|
+
end
|
399
|
+
filtered = d.filtered_as_array
|
400
|
+
assert_equal 1, filtered.length
|
401
|
+
|
402
|
+
first = filtered[0]
|
403
|
+
assert_equal 'test.no.change', first[0]
|
404
|
+
assert_equal time, first[1]
|
405
|
+
|
406
|
+
record = first[2]
|
407
|
+
assert_equal 4, record.keys.size
|
408
|
+
assert_equal original['data'], record['data']
|
409
|
+
assert_equal original['xxx'], record['xxx']
|
410
|
+
assert_equal original['yyy'], record['yyy']
|
411
|
+
assert_equal({"xxx"=>"first","yyy"=>"second"}, record['parsed'])
|
412
|
+
|
413
|
+
d = create_driver(CONFIG_HASH_VALUE_FIELD_WITH_INJECT_KEY_PREFIX, 'test.no.change')
|
414
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
415
|
+
d.run do
|
416
|
+
d.filter(original, time)
|
417
|
+
end
|
418
|
+
filtered = d.filtered_as_array
|
419
|
+
assert_equal 1, filtered.length
|
420
|
+
|
421
|
+
first = filtered[0]
|
422
|
+
assert_equal 'test.no.change', first[0]
|
423
|
+
assert_equal time, first[1]
|
424
|
+
|
425
|
+
record = first[2]
|
426
|
+
assert_equal 1, record.keys.size
|
427
|
+
assert_equal({"data.xxx"=>"first","data.yyy"=>"second"}, record['parsed'])
|
428
|
+
end
|
429
|
+
|
430
|
+
CONFIG_DONT_PARSE_TIME = %[
|
431
|
+
key_name data
|
432
|
+
format json
|
433
|
+
time_parse no
|
434
|
+
]
|
435
|
+
def test_time_should_be_reserved
|
436
|
+
t = Time.now.to_i
|
437
|
+
d = create_driver(CONFIG_DONT_PARSE_TIME, 'test.no.change')
|
438
|
+
|
439
|
+
d.run do
|
440
|
+
d.filter({'data' => '{"time":1383190430, "f1":"v1"}'}, t)
|
441
|
+
d.filter({'data' => '{"time":"1383190430", "f1":"v1"}'}, t)
|
442
|
+
d.filter({'data' => '{"time":"2013-10-31 12:34:03 +0900", "f1":"v1"}'}, t)
|
443
|
+
end
|
444
|
+
filtered = d.filtered_as_array
|
445
|
+
assert_equal 3, filtered.length
|
446
|
+
|
447
|
+
assert_equal 'test.no.change', filtered[0][0]
|
448
|
+
assert_equal 'v1', filtered[0][2]['f1']
|
449
|
+
assert_equal 1383190430, filtered[0][2]['time']
|
450
|
+
assert_equal t, filtered[0][1]
|
451
|
+
|
452
|
+
assert_equal 'test.no.change', filtered[1][0]
|
453
|
+
assert_equal 'v1', filtered[1][2]['f1']
|
454
|
+
assert_equal "1383190430", filtered[1][2]['time']
|
455
|
+
assert_equal t, filtered[1][1]
|
456
|
+
|
457
|
+
assert_equal 'test.no.change', filtered[2][0]
|
458
|
+
assert_equal 'v1', filtered[2][2]['f1']
|
459
|
+
assert_equal '2013-10-31 12:34:03 +0900', filtered[2][2]['time']
|
460
|
+
assert_equal t, filtered[2][1]
|
461
|
+
end
|
462
|
+
|
463
|
+
CONFIG_INVALID_TIME_VALUE = %[
|
464
|
+
remove_prefix test
|
465
|
+
key_name data
|
466
|
+
format json
|
467
|
+
] # 'time' is implicit @time_key
|
468
|
+
def test_filter_invalid_time_data
|
469
|
+
# should not raise errors
|
470
|
+
t = Time.now.to_i
|
471
|
+
d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.no.change')
|
472
|
+
assert_nothing_raised {
|
473
|
+
d.run do
|
474
|
+
d.filter({'data' => '{"time":[], "f1":"v1"}'}, t)
|
475
|
+
d.filter({'data' => '{"time":"thisisnottime", "f1":"v1"}'}, t)
|
476
|
+
end
|
477
|
+
}
|
478
|
+
filtered = d.filtered_as_array
|
479
|
+
assert_equal 1, filtered.length
|
480
|
+
|
481
|
+
assert_equal 'test.no.change', filtered[0][0]
|
482
|
+
assert_equal 0, filtered[0][1]
|
483
|
+
assert_equal 'v1', filtered[0][2]['f1']
|
484
|
+
assert_equal 0, filtered[0][2]['time'].to_i
|
485
|
+
end
|
486
|
+
|
487
|
+
# REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
488
|
+
|
489
|
+
CONFIG_NOT_REPLACE = %[
|
490
|
+
remove_prefix test
|
491
|
+
key_name data
|
492
|
+
format /^(?<message>.*)$/
|
493
|
+
]
|
494
|
+
CONFIG_INVALID_BYTE = CONFIG_NOT_REPLACE + %[
|
495
|
+
replace_invalid_sequence true
|
496
|
+
]
|
497
|
+
def test_filter_invalid_byte
|
498
|
+
invalid_utf8 = "\xff".force_encoding('UTF-8')
|
499
|
+
|
500
|
+
d = create_driver(CONFIG_NOT_REPLACE, 'test.no.change')
|
501
|
+
assert_raise(ArgumentError) {
|
502
|
+
d.run do
|
503
|
+
d.filter({'data' => invalid_utf8}, Time.now.to_i)
|
504
|
+
end
|
505
|
+
}
|
506
|
+
|
507
|
+
d = create_driver(CONFIG_INVALID_BYTE, 'test.in')
|
508
|
+
assert_nothing_raised {
|
509
|
+
d.run do
|
510
|
+
d.emit({'data' => invalid_utf8}, Time.now.to_i)
|
511
|
+
end
|
512
|
+
}
|
513
|
+
filtered = d.filtered_as_array
|
514
|
+
assert_equal 1, filtered.length
|
515
|
+
assert_nil filtered[0][2]['data']
|
516
|
+
assert_equal '?'.force_encoding('UTF-8'), filtered[0][2]['message']
|
517
|
+
|
518
|
+
d = create_driver(CONFIG_INVALID_BYTE + %[
|
519
|
+
reserve_data yes
|
520
|
+
], 'test.no.change')
|
521
|
+
assert_nothing_raised {
|
522
|
+
d.run do
|
523
|
+
d.filter({'data' => invalid_utf8}, Time.now.to_i)
|
524
|
+
end
|
525
|
+
}
|
526
|
+
filtered = d.filtered_as_array
|
527
|
+
assert_equal 1, filtered.length
|
528
|
+
assert_equal invalid_utf8, filtered[0][2]['data']
|
529
|
+
assert_equal '?'.force_encoding('UTF-8'), filtered[0][2]['message']
|
530
|
+
|
531
|
+
invalid_ascii = "\xff".force_encoding('US-ASCII')
|
532
|
+
d = create_driver(CONFIG_INVALID_BYTE, 'test.no.change')
|
533
|
+
assert_nothing_raised {
|
534
|
+
d.run do
|
535
|
+
d.filter({'data' => invalid_ascii}, Time.now.to_i)
|
536
|
+
end
|
537
|
+
}
|
538
|
+
filtered = d.filtered_as_array
|
539
|
+
assert_equal 1, filtered.length
|
540
|
+
assert_nil filtered[0][2]['data']
|
541
|
+
assert_equal '?'.force_encoding('US-ASCII'), filtered[0][2]['message']
|
542
|
+
end
|
543
|
+
|
544
|
+
# suppress_parse_error_log test
|
545
|
+
CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG = %[
|
546
|
+
tag hogelog
|
547
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
548
|
+
key_name message
|
549
|
+
suppress_parse_error_log false
|
550
|
+
]
|
551
|
+
CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG = %[
|
552
|
+
tag hogelog
|
553
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
554
|
+
key_name message
|
555
|
+
suppress_parse_error_log true
|
556
|
+
]
|
557
|
+
CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG = %[
|
558
|
+
tag hogelog
|
559
|
+
format /^col1=(?<col1>.+) col2=(?<col2>.+)$/
|
560
|
+
key_name message
|
561
|
+
]
|
562
|
+
|
563
|
+
INVALID_MESSAGE = 'foo bar'
|
564
|
+
VALID_MESSAGE = 'col1=foo col2=bar'
|
565
|
+
|
566
|
+
# if call warn() raise exception
|
567
|
+
class DummyLoggerWarnedException < StandardError; end
|
568
|
+
class DummyLogger
|
569
|
+
def warn(message)
|
570
|
+
raise DummyLoggerWarnedException
|
571
|
+
end
|
572
|
+
end
|
573
|
+
|
574
|
+
def swap_logger(instance)
|
575
|
+
raise "use with block" unless block_given?
|
576
|
+
dummy = DummyLogger.new
|
577
|
+
saved_logger = instance.log
|
578
|
+
instance.log = dummy
|
579
|
+
restore = lambda{ instance.log = saved_logger }
|
580
|
+
|
581
|
+
yield
|
582
|
+
|
583
|
+
restore.call
|
584
|
+
end
|
585
|
+
|
586
|
+
def test_parser_error_warning
|
587
|
+
d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.no.change')
|
588
|
+
swap_logger(d.instance) do
|
589
|
+
assert_raise(DummyLoggerWarnedException) {
|
590
|
+
d.run do
|
591
|
+
d.filter({'data' => '{"time":[], "f1":"v1"}'}, Time.now.to_i)
|
592
|
+
end
|
593
|
+
}
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
class DefaultSuppressParseErrorLogTest < self
|
598
|
+
def setup
|
599
|
+
# default(disabled) 'suppress_parse_error_log' is not specify
|
600
|
+
@d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.no.change')
|
601
|
+
end
|
602
|
+
|
603
|
+
def test_raise_exception
|
604
|
+
swap_logger(@d.instance) do
|
605
|
+
assert_raise(DummyLoggerWarnedException) {
|
606
|
+
@d.run do
|
607
|
+
@d.filter({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
608
|
+
end
|
609
|
+
}
|
610
|
+
end
|
611
|
+
end
|
612
|
+
|
613
|
+
def test_nothing_raised
|
614
|
+
swap_logger(@d.instance) do
|
615
|
+
assert_nothing_raised {
|
616
|
+
@d.run do
|
617
|
+
@d.filter({'message' => VALID_MESSAGE}, Time.now.to_i)
|
618
|
+
end
|
619
|
+
}
|
620
|
+
end
|
621
|
+
end
|
622
|
+
end
|
623
|
+
|
624
|
+
class DisabledSuppressParseErrorLogTest < self
|
625
|
+
def setup
|
626
|
+
# disabled 'suppress_parse_error_log'
|
627
|
+
@d = create_driver(CONFIG_DISABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.no.change')
|
628
|
+
end
|
629
|
+
|
630
|
+
def test_raise_exception
|
631
|
+
swap_logger(@d.instance) do
|
632
|
+
assert_raise(DummyLoggerWarnedException) {
|
633
|
+
@d.run do
|
634
|
+
@d.filter({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
635
|
+
end
|
636
|
+
}
|
637
|
+
end
|
638
|
+
end
|
639
|
+
|
640
|
+
def test_nothing_raised
|
641
|
+
swap_logger(@d.instance) do
|
642
|
+
assert_nothing_raised {
|
643
|
+
@d.run do
|
644
|
+
@d.filter({'message' => VALID_MESSAGE}, Time.now.to_i)
|
645
|
+
end
|
646
|
+
}
|
647
|
+
end
|
648
|
+
end
|
649
|
+
end
|
650
|
+
|
651
|
+
class EnabledSuppressParseErrorLogTest < self
|
652
|
+
def setup
|
653
|
+
# enabled 'suppress_parse_error_log'
|
654
|
+
@d = create_driver(CONFIG_ENABELED_SUPPRESS_PARSE_ERROR_LOG, 'test.no.change')
|
655
|
+
end
|
656
|
+
|
657
|
+
def test_nothing_raised
|
658
|
+
swap_logger(@d.instance) do
|
659
|
+
assert_nothing_raised {
|
660
|
+
@d.run do
|
661
|
+
@d.filter({'message' => INVALID_MESSAGE}, Time.now.to_i)
|
662
|
+
@d.filter({'message' => VALID_MESSAGE}, Time.now.to_i)
|
663
|
+
end
|
664
|
+
}
|
665
|
+
end
|
666
|
+
end
|
667
|
+
end
|
668
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: test-unit
|
@@ -67,11 +67,15 @@ files:
|
|
67
67
|
- README.md
|
68
68
|
- Rakefile
|
69
69
|
- fluent-plugin-parser.gemspec
|
70
|
+
- lib/fluent/plugin/filter_deparser.rb
|
71
|
+
- lib/fluent/plugin/filter_parser.rb
|
70
72
|
- lib/fluent/plugin/out_deparser.rb
|
71
73
|
- lib/fluent/plugin/out_parser.rb
|
72
74
|
- test/custom_parser.rb
|
73
75
|
- test/helper.rb
|
74
76
|
- test/plugin/test_deparser.rb
|
77
|
+
- test/plugin/test_filter_deparser.rb
|
78
|
+
- test/plugin/test_filter_parser.rb
|
75
79
|
- test/plugin/test_out_parser.rb
|
76
80
|
- test/plugin/test_out_parser_for_parsers.rb
|
77
81
|
homepage: https://github.com/tagomoris/fluent-plugin-parser
|
@@ -102,5 +106,8 @@ test_files:
|
|
102
106
|
- test/custom_parser.rb
|
103
107
|
- test/helper.rb
|
104
108
|
- test/plugin/test_deparser.rb
|
109
|
+
- test/plugin/test_filter_deparser.rb
|
110
|
+
- test/plugin/test_filter_parser.rb
|
105
111
|
- test/plugin/test_out_parser.rb
|
106
112
|
- test/plugin/test_out_parser_for_parsers.rb
|
113
|
+
has_rdoc:
|