fluent-plugin-multiline-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +36 -0
- data/Gemfile +3 -0
- data/LICENSE +201 -0
- data/README.md +1 -0
- data/Rakefile +11 -0
- data/fluent-plugin-multiline-parser.gemspec +21 -0
- data/lib/fluent/plugin/filter_deparser.rb +35 -0
- data/lib/fluent/plugin/filter_parser.rb +138 -0
- data/lib/fluent/plugin/out_deparser.rb +94 -0
- data/lib/fluent/plugin/out_parser.rb +172 -0
- data/test/custom_parser.rb +39 -0
- data/test/helper.rb +31 -0
- data/test/plugin/test_deparser.rb +149 -0
- data/test/plugin/test_filter_deparser.rb +82 -0
- data/test/plugin/test_filter_parser.rb +668 -0
- data/test/plugin/test_out_parser.rb +702 -0
- data/test/plugin/test_out_parser_for_parsers.rb +285 -0
- metadata +125 -0
@@ -0,0 +1,94 @@
|
|
1
|
+
class Fluent::DeparserOutput < Fluent::Output
|
2
|
+
Fluent::Plugin.register_output('deparser', self)
|
3
|
+
|
4
|
+
config_param :tag, :string, :default => nil
|
5
|
+
config_param :remove_prefix, :string, :default => nil
|
6
|
+
config_param :add_prefix, :string, :default => nil
|
7
|
+
config_param :format, :string
|
8
|
+
config_param :format_key_names, :string
|
9
|
+
config_param :key_name, :string, :default => 'message'
|
10
|
+
config_param :reserve_data, :bool, :default => false
|
11
|
+
|
12
|
+
def april_fool_emit(tag, es, chain)
|
13
|
+
es.each {|time,record|
|
14
|
+
keys = record.keys.shuffle
|
15
|
+
new_record = {@key_name => keys.map{|k| record[k]}.join(' ')}
|
16
|
+
router.emit(@tag, time, new_record)
|
17
|
+
}
|
18
|
+
chain.next
|
19
|
+
end
|
20
|
+
|
21
|
+
# Define `log` method for v0.10.42 or earlier
|
22
|
+
unless method_defined?(:log)
|
23
|
+
define_method("log") { $log }
|
24
|
+
end
|
25
|
+
|
26
|
+
def configure(conf)
|
27
|
+
if conf['tag'] == 'april.fool'
|
28
|
+
conf['format'] = '%s'
|
29
|
+
conf['format_key_names'] = 'x'
|
30
|
+
end
|
31
|
+
|
32
|
+
super
|
33
|
+
|
34
|
+
if @tag == 'april.fool'
|
35
|
+
m = method(:april_fool_emit)
|
36
|
+
(class << self; self; end).module_eval do
|
37
|
+
define_method(:emit, m)
|
38
|
+
end
|
39
|
+
return
|
40
|
+
end
|
41
|
+
|
42
|
+
if not @tag and not @remove_prefix and not @add_prefix
|
43
|
+
raise Fluent::ConfigError, "missing both of remove_prefix and add_prefix"
|
44
|
+
end
|
45
|
+
if @tag and (@remove_prefix or @add_prefix)
|
46
|
+
raise Fluent::ConfigError, "both of tag and remove_prefix/add_prefix must not be specified"
|
47
|
+
end
|
48
|
+
if @remove_prefix
|
49
|
+
@removed_prefix_string = @remove_prefix + '.'
|
50
|
+
@removed_length = @removed_prefix_string.length
|
51
|
+
end
|
52
|
+
if @add_prefix
|
53
|
+
@added_prefix_string = @add_prefix + '.'
|
54
|
+
end
|
55
|
+
|
56
|
+
@format_key_names = @format_key_names.split(',')
|
57
|
+
begin
|
58
|
+
dummy = @format % (["x"] * @format_key_names.length)
|
59
|
+
rescue ArgumentError
|
60
|
+
raise Fluent::ConfigError, "mismatch between placeholder of format and format_key_names"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def emit(tag, es, chain)
|
65
|
+
tag = if @tag
|
66
|
+
@tag
|
67
|
+
else
|
68
|
+
if @remove_prefix and
|
69
|
+
( (tag.start_with?(@removed_prefix_string) and tag.length > @removed_length) or tag == @remove_prefix)
|
70
|
+
tag = tag[@removed_length..-1]
|
71
|
+
end
|
72
|
+
if @add_prefix
|
73
|
+
tag = if tag and tag.length > 0
|
74
|
+
@added_prefix_string + tag
|
75
|
+
else
|
76
|
+
@add_prefix
|
77
|
+
end
|
78
|
+
end
|
79
|
+
tag
|
80
|
+
end
|
81
|
+
if @reserve_data
|
82
|
+
es.each {|time,record|
|
83
|
+
record.update({@key_name => (@format % @format_key_names.map{|k| record[k]})})
|
84
|
+
router.emit(tag, time, record)
|
85
|
+
}
|
86
|
+
else
|
87
|
+
es.each {|time,record|
|
88
|
+
new_record = {@key_name => (@format % @format_key_names.map{|k| record[k]})}
|
89
|
+
router.emit(tag, time, new_record)
|
90
|
+
}
|
91
|
+
end
|
92
|
+
chain.next
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'fluent/parser'
|
2
|
+
require 'thread_safe'
|
3
|
+
|
4
|
+
class Fluent::ParserOutput < Fluent::Output
|
5
|
+
|
6
|
+
@@lines_buffer = ThreadSafe::Hash.new
|
7
|
+
Fluent::Plugin.register_output('parser', self)
|
8
|
+
|
9
|
+
config_param :tag, :string, :default => nil
|
10
|
+
config_param :remove_prefix, :string, :default => nil
|
11
|
+
config_param :add_prefix, :string, :default => nil
|
12
|
+
config_param :key_name, :string
|
13
|
+
config_param :reserve_data, :bool, :default => false
|
14
|
+
config_param :inject_key_prefix, :string, :default => nil
|
15
|
+
config_param :replace_invalid_sequence, :bool, :default => false
|
16
|
+
config_param :hash_value_field, :string, :default => nil
|
17
|
+
config_param :suppress_parse_error_log, :bool, :default => false
|
18
|
+
config_param :time_parse, :bool, :default => true
|
19
|
+
|
20
|
+
attr_reader :parser
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
super
|
24
|
+
require 'time'
|
25
|
+
end
|
26
|
+
|
27
|
+
# Define `log` method for v0.10.42 or earlier
|
28
|
+
unless method_defined?(:log)
|
29
|
+
define_method("log") { $log }
|
30
|
+
end
|
31
|
+
|
32
|
+
def configure(conf)
|
33
|
+
super
|
34
|
+
|
35
|
+
if not @tag and not @remove_prefix and not @add_prefix
|
36
|
+
raise Fluent::ConfigError, "missing both of remove_prefix and add_prefix"
|
37
|
+
end
|
38
|
+
if @tag and (@remove_prefix or @add_prefix)
|
39
|
+
raise Fluent::ConfigError, "both of tag and remove_prefix/add_prefix must not be specified"
|
40
|
+
end
|
41
|
+
if @remove_prefix
|
42
|
+
@removed_prefix_string = @remove_prefix + '.'
|
43
|
+
@removed_length = @removed_prefix_string.length
|
44
|
+
end
|
45
|
+
if @add_prefix
|
46
|
+
@added_prefix_string = @add_prefix + '.'
|
47
|
+
end
|
48
|
+
@multiline_mode = conf['format'] =~ /multiline/
|
49
|
+
@receive_handler = if @multiline_mode
|
50
|
+
method(:parse_multilines)
|
51
|
+
else
|
52
|
+
method(:parse_singleline)
|
53
|
+
end
|
54
|
+
|
55
|
+
@parser = Fluent::Plugin.new_parser(conf['format'])
|
56
|
+
@parser.estimate_current_event = false
|
57
|
+
@parser.configure(conf)
|
58
|
+
if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
|
59
|
+
# disable parse time
|
60
|
+
@parser.parser.time_key = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
self
|
64
|
+
end
|
65
|
+
def parse_singleline(tag, time, record, line)
|
66
|
+
line.chomp!
|
67
|
+
@parser.parse(line) do |t,values|
|
68
|
+
if values
|
69
|
+
t ||= time
|
70
|
+
r = handle_parsed(tag, record, t, values)
|
71
|
+
else
|
72
|
+
log.warn "pattern not match with data #{tag} '#{line}'" unless @suppress_parse_error_log
|
73
|
+
if @reserve_data
|
74
|
+
t = time
|
75
|
+
r = handle_parsed(tag, record, time, {})
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def parse_multilines(tag, time, record, line)
|
82
|
+
if @@lines_buffer.has_key?(tag)
|
83
|
+
matches = @parser.firstline?(line)
|
84
|
+
if matches
|
85
|
+
index = line.index(matches[0])
|
86
|
+
if index && index > 0
|
87
|
+
@@lines_buffer[tag] << line[0..index]
|
88
|
+
line = line[index..-1]
|
89
|
+
end
|
90
|
+
parse_singleline(tag, time, record, @@lines_buffer[tag])
|
91
|
+
@@lines_buffer[tag] = line
|
92
|
+
else
|
93
|
+
@@lines_buffer[tag] << line
|
94
|
+
end
|
95
|
+
else
|
96
|
+
@@lines_buffer[tag] = line
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
def emit(tag, es, chain)
|
102
|
+
tag = if @tag
|
103
|
+
@tag
|
104
|
+
else
|
105
|
+
if @remove_prefix and
|
106
|
+
( (tag.start_with?(@removed_prefix_string) and tag.length > @removed_length) or tag == @remove_prefix)
|
107
|
+
tag = tag[@removed_length..-1]
|
108
|
+
end
|
109
|
+
if @add_prefix
|
110
|
+
tag = if tag and tag.length > 0
|
111
|
+
@added_prefix_string + tag
|
112
|
+
else
|
113
|
+
@add_prefix
|
114
|
+
end
|
115
|
+
end
|
116
|
+
tag
|
117
|
+
end
|
118
|
+
es.each do |time,record|
|
119
|
+
raw_value = record[@key_name]
|
120
|
+
begin
|
121
|
+
@receive_handler.call(tag, time, record, raw_value)
|
122
|
+
rescue Fluent::TextParser::ParserError => e
|
123
|
+
log.warn e.message unless @suppress_parse_error_log
|
124
|
+
rescue ArgumentError => e
|
125
|
+
if @replace_invalid_sequence
|
126
|
+
unless e.message.index("invalid byte sequence in") == 0
|
127
|
+
raise
|
128
|
+
end
|
129
|
+
replaced_string = replace_invalid_byte(raw_value)
|
130
|
+
@parser.parse(replaced_string) do |t,values|
|
131
|
+
if values
|
132
|
+
t ||= time
|
133
|
+
handle_parsed(tag, record, t, values)
|
134
|
+
else
|
135
|
+
log.warn "pattern not match with data '#{raw_value}'" unless @suppress_parse_error_log
|
136
|
+
if @reserve_data
|
137
|
+
t = time
|
138
|
+
handle_parsed(tag, record, time, {})
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
else
|
143
|
+
raise
|
144
|
+
end
|
145
|
+
rescue => e
|
146
|
+
log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
chain.next
|
151
|
+
end
|
152
|
+
|
153
|
+
private
|
154
|
+
|
155
|
+
def handle_parsed(tag, record, t, values)
|
156
|
+
if values && @inject_key_prefix
|
157
|
+
values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
|
158
|
+
end
|
159
|
+
r = @hash_value_field ? {@hash_value_field => values} : values
|
160
|
+
if @reserve_data
|
161
|
+
r = r ? record.merge(r) : record
|
162
|
+
end
|
163
|
+
router.emit(tag, t, r)
|
164
|
+
end
|
165
|
+
|
166
|
+
def replace_invalid_byte(string)
|
167
|
+
replace_options = { invalid: :replace, undef: :replace, replace: '?' }
|
168
|
+
original_encoding = string.encoding
|
169
|
+
temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
|
170
|
+
string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Fluent
|
2
|
+
class TextParser
|
3
|
+
class KVPairParser < Parser
|
4
|
+
Plugin.register_parser('kv_pair', self)
|
5
|
+
|
6
|
+
# key<delim1>value is pair and <pair><delim2><pair> ...
|
7
|
+
# newline splits records
|
8
|
+
include Configurable
|
9
|
+
|
10
|
+
config_param :delim1, :string
|
11
|
+
config_param :delim2, :string
|
12
|
+
|
13
|
+
config_param :time_key, :string, :default => "time"
|
14
|
+
config_param :time_format, :string, :default => nil # time_format is configurable
|
15
|
+
|
16
|
+
def configure(conf)
|
17
|
+
super
|
18
|
+
@time_parser = TimeParser.new(@time_format)
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse(text)
|
22
|
+
text.split("\n").each do |line|
|
23
|
+
pairs = text.split(@delim2)
|
24
|
+
record = {}
|
25
|
+
time = nil
|
26
|
+
pairs.each do |pair|
|
27
|
+
k, v = pair.split(@delim1, 2)
|
28
|
+
if k == @time_key
|
29
|
+
time = @time_parser.parse(v)
|
30
|
+
else
|
31
|
+
record[k] = v
|
32
|
+
end
|
33
|
+
end
|
34
|
+
yield time, record
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'fluent/test'
|
15
|
+
unless ENV.has_key?('VERBOSE')
|
16
|
+
nulllogger = Object.new
|
17
|
+
nulllogger.instance_eval {|obj|
|
18
|
+
def method_missing(method, *args)
|
19
|
+
# pass
|
20
|
+
end
|
21
|
+
}
|
22
|
+
$log = nulllogger
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'fluent/plugin/out_parser'
|
26
|
+
require 'fluent/plugin/out_deparser'
|
27
|
+
require 'fluent/plugin/filter_parser'
|
28
|
+
require 'fluent/plugin/filter_deparser'
|
29
|
+
|
30
|
+
class Test::Unit::TestCase
|
31
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class DeparserOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
remove_prefix test
|
10
|
+
add_prefix combined
|
11
|
+
format %s: %s %s %s
|
12
|
+
format_key_names host,path,status,size
|
13
|
+
key_name fulltext
|
14
|
+
reserve_data true
|
15
|
+
]
|
16
|
+
|
17
|
+
def create_driver(conf=CONFIG,tag='test')
|
18
|
+
Fluent::Test::OutputTestDriver.new(Fluent::DeparserOutput, tag).configure(conf)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_configure
|
22
|
+
assert_nothing_raised {
|
23
|
+
d = create_driver %[
|
24
|
+
tag april.fool
|
25
|
+
]
|
26
|
+
}
|
27
|
+
|
28
|
+
assert_raise(Fluent::ConfigError) {
|
29
|
+
d = create_driver('')
|
30
|
+
}
|
31
|
+
assert_raise(Fluent::ConfigError) {
|
32
|
+
d = create_driver %[
|
33
|
+
tag foo.bar
|
34
|
+
]
|
35
|
+
}
|
36
|
+
assert_raise(Fluent::ConfigError) {
|
37
|
+
d = create_driver %[
|
38
|
+
format %s
|
39
|
+
format_key_names x
|
40
|
+
]
|
41
|
+
}
|
42
|
+
assert_raise(Fluent::ConfigError) {
|
43
|
+
d = create_driver %[
|
44
|
+
tag foo.bar
|
45
|
+
remove_prefix foo
|
46
|
+
format %s
|
47
|
+
format_key_names x
|
48
|
+
]
|
49
|
+
}
|
50
|
+
assert_raise(Fluent::ConfigError) {
|
51
|
+
d = create_driver %[
|
52
|
+
tag foo.bar
|
53
|
+
add_prefix foo
|
54
|
+
format %s
|
55
|
+
format_key_names x
|
56
|
+
]
|
57
|
+
}
|
58
|
+
assert_raise(Fluent::ConfigError) {
|
59
|
+
d = create_driver %[
|
60
|
+
tag foo.bar
|
61
|
+
format_key_names x
|
62
|
+
]
|
63
|
+
}
|
64
|
+
assert_raise(Fluent::ConfigError) {
|
65
|
+
d = create_driver %[
|
66
|
+
tag foo.bar
|
67
|
+
format %s
|
68
|
+
]
|
69
|
+
}
|
70
|
+
assert_raise(Fluent::ConfigError) {
|
71
|
+
d = create_driver %[
|
72
|
+
tag foo.bar
|
73
|
+
format %s %s %s
|
74
|
+
format_key_names x,y
|
75
|
+
]
|
76
|
+
}
|
77
|
+
assert_nothing_raised(Fluent::ConfigError) {
|
78
|
+
# mmm...
|
79
|
+
d = create_driver %[
|
80
|
+
tag foo.bar
|
81
|
+
format %s %s
|
82
|
+
format_key_names x,y,z
|
83
|
+
]
|
84
|
+
}
|
85
|
+
|
86
|
+
d = create_driver %[
|
87
|
+
tag foo.bar
|
88
|
+
format %s: %s %s
|
89
|
+
format_key_names x,y,z
|
90
|
+
]
|
91
|
+
assert_equal 'foo.bar', d.instance.tag
|
92
|
+
assert_equal '%s: %s %s', d.instance.format
|
93
|
+
assert_equal ['x','y','z'], d.instance.format_key_names
|
94
|
+
assert_equal 'message', d.instance.key_name
|
95
|
+
assert_equal false, d.instance.reserve_data
|
96
|
+
end
|
97
|
+
|
98
|
+
# CONFIG = %[
|
99
|
+
# remove_prefix test
|
100
|
+
# add_prefix combined
|
101
|
+
# format %s: %s %s %s
|
102
|
+
# format_key_names host path status size
|
103
|
+
# key_name fulltext
|
104
|
+
# reserve_data true
|
105
|
+
# ]
|
106
|
+
def test_emit
|
107
|
+
d1 = create_driver(CONFIG, 'test.in')
|
108
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
109
|
+
d1.run do
|
110
|
+
d1.emit({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
|
111
|
+
d1.emit({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
|
112
|
+
end
|
113
|
+
emits = d1.emits
|
114
|
+
assert_equal 2, emits.length
|
115
|
+
first = emits[0]
|
116
|
+
assert_equal 'combined.in', first[0]
|
117
|
+
assert_equal time, first[1]
|
118
|
+
assert_equal 'xxx.local: /f/1 200 300', first[2]['fulltext']
|
119
|
+
assert_equal ['fulltext','host','path','size','status'], first[2].keys.sort
|
120
|
+
second = emits[1]
|
121
|
+
assert_equal 'combined.in', second[0]
|
122
|
+
assert_equal time, second[1]
|
123
|
+
assert_equal 'yyy.local: /f/2 302 512', second[2]['fulltext']
|
124
|
+
assert_equal ['fulltext','host','path','size','status'], second[2].keys.sort
|
125
|
+
|
126
|
+
d2 = create_driver(%[
|
127
|
+
tag combined
|
128
|
+
format %s: %s %s
|
129
|
+
format_key_names host,path,status
|
130
|
+
], 'test.in')
|
131
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
132
|
+
d2.run do
|
133
|
+
d2.emit({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
|
134
|
+
d2.emit({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
|
135
|
+
end
|
136
|
+
emits = d2.emits
|
137
|
+
assert_equal 2, emits.length
|
138
|
+
first = emits[0]
|
139
|
+
assert_equal 'combined', first[0]
|
140
|
+
assert_equal time, first[1]
|
141
|
+
assert_equal 'xxx.local: /f/1 200', first[2]['message']
|
142
|
+
assert_equal ['message'], first[2].keys.sort
|
143
|
+
second = emits[1]
|
144
|
+
assert_equal 'combined', second[0]
|
145
|
+
assert_equal time, second[1]
|
146
|
+
assert_equal 'yyy.local: /f/2 302', second[2]['message']
|
147
|
+
assert_equal ['message'], second[2].keys.sort
|
148
|
+
end
|
149
|
+
end
|