fluent-plugin-multiline-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ class Fluent::DeparserOutput < Fluent::Output
2
+ Fluent::Plugin.register_output('deparser', self)
3
+
4
+ config_param :tag, :string, :default => nil
5
+ config_param :remove_prefix, :string, :default => nil
6
+ config_param :add_prefix, :string, :default => nil
7
+ config_param :format, :string
8
+ config_param :format_key_names, :string
9
+ config_param :key_name, :string, :default => 'message'
10
+ config_param :reserve_data, :bool, :default => false
11
+
12
+ def april_fool_emit(tag, es, chain)
13
+ es.each {|time,record|
14
+ keys = record.keys.shuffle
15
+ new_record = {@key_name => keys.map{|k| record[k]}.join(' ')}
16
+ router.emit(@tag, time, new_record)
17
+ }
18
+ chain.next
19
+ end
20
+
21
+ # Define `log` method for v0.10.42 or earlier
22
+ unless method_defined?(:log)
23
+ define_method("log") { $log }
24
+ end
25
+
26
+ def configure(conf)
27
+ if conf['tag'] == 'april.fool'
28
+ conf['format'] = '%s'
29
+ conf['format_key_names'] = 'x'
30
+ end
31
+
32
+ super
33
+
34
+ if @tag == 'april.fool'
35
+ m = method(:april_fool_emit)
36
+ (class << self; self; end).module_eval do
37
+ define_method(:emit, m)
38
+ end
39
+ return
40
+ end
41
+
42
+ if not @tag and not @remove_prefix and not @add_prefix
43
+ raise Fluent::ConfigError, "missing both of remove_prefix and add_prefix"
44
+ end
45
+ if @tag and (@remove_prefix or @add_prefix)
46
+ raise Fluent::ConfigError, "both of tag and remove_prefix/add_prefix must not be specified"
47
+ end
48
+ if @remove_prefix
49
+ @removed_prefix_string = @remove_prefix + '.'
50
+ @removed_length = @removed_prefix_string.length
51
+ end
52
+ if @add_prefix
53
+ @added_prefix_string = @add_prefix + '.'
54
+ end
55
+
56
+ @format_key_names = @format_key_names.split(',')
57
+ begin
58
+ dummy = @format % (["x"] * @format_key_names.length)
59
+ rescue ArgumentError
60
+ raise Fluent::ConfigError, "mismatch between placeholder of format and format_key_names"
61
+ end
62
+ end
63
+
64
+ def emit(tag, es, chain)
65
+ tag = if @tag
66
+ @tag
67
+ else
68
+ if @remove_prefix and
69
+ ( (tag.start_with?(@removed_prefix_string) and tag.length > @removed_length) or tag == @remove_prefix)
70
+ tag = tag[@removed_length..-1]
71
+ end
72
+ if @add_prefix
73
+ tag = if tag and tag.length > 0
74
+ @added_prefix_string + tag
75
+ else
76
+ @add_prefix
77
+ end
78
+ end
79
+ tag
80
+ end
81
+ if @reserve_data
82
+ es.each {|time,record|
83
+ record.update({@key_name => (@format % @format_key_names.map{|k| record[k]})})
84
+ router.emit(tag, time, record)
85
+ }
86
+ else
87
+ es.each {|time,record|
88
+ new_record = {@key_name => (@format % @format_key_names.map{|k| record[k]})}
89
+ router.emit(tag, time, new_record)
90
+ }
91
+ end
92
+ chain.next
93
+ end
94
+ end
@@ -0,0 +1,172 @@
1
+ require 'fluent/parser'
2
+ require 'thread_safe'
3
+
4
+ class Fluent::ParserOutput < Fluent::Output
5
+
6
+ @@lines_buffer = ThreadSafe::Hash.new
7
+ Fluent::Plugin.register_output('parser', self)
8
+
9
+ config_param :tag, :string, :default => nil
10
+ config_param :remove_prefix, :string, :default => nil
11
+ config_param :add_prefix, :string, :default => nil
12
+ config_param :key_name, :string
13
+ config_param :reserve_data, :bool, :default => false
14
+ config_param :inject_key_prefix, :string, :default => nil
15
+ config_param :replace_invalid_sequence, :bool, :default => false
16
+ config_param :hash_value_field, :string, :default => nil
17
+ config_param :suppress_parse_error_log, :bool, :default => false
18
+ config_param :time_parse, :bool, :default => true
19
+
20
+ attr_reader :parser
21
+
22
+ def initialize
23
+ super
24
+ require 'time'
25
+ end
26
+
27
+ # Define `log` method for v0.10.42 or earlier
28
+ unless method_defined?(:log)
29
+ define_method("log") { $log }
30
+ end
31
+
32
+ def configure(conf)
33
+ super
34
+
35
+ if not @tag and not @remove_prefix and not @add_prefix
36
+ raise Fluent::ConfigError, "missing both of remove_prefix and add_prefix"
37
+ end
38
+ if @tag and (@remove_prefix or @add_prefix)
39
+ raise Fluent::ConfigError, "both of tag and remove_prefix/add_prefix must not be specified"
40
+ end
41
+ if @remove_prefix
42
+ @removed_prefix_string = @remove_prefix + '.'
43
+ @removed_length = @removed_prefix_string.length
44
+ end
45
+ if @add_prefix
46
+ @added_prefix_string = @add_prefix + '.'
47
+ end
48
+ @multiline_mode = conf['format'] =~ /multiline/
49
+ @receive_handler = if @multiline_mode
50
+ method(:parse_multilines)
51
+ else
52
+ method(:parse_singleline)
53
+ end
54
+
55
+ @parser = Fluent::Plugin.new_parser(conf['format'])
56
+ @parser.estimate_current_event = false
57
+ @parser.configure(conf)
58
+ if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
59
+ # disable parse time
60
+ @parser.parser.time_key = nil
61
+ end
62
+
63
+ self
64
+ end
65
+ def parse_singleline(tag, time, record, line)
66
+ line.chomp!
67
+ @parser.parse(line) do |t,values|
68
+ if values
69
+ t ||= time
70
+ r = handle_parsed(tag, record, t, values)
71
+ else
72
+ log.warn "pattern not match with data #{tag} '#{line}'" unless @suppress_parse_error_log
73
+ if @reserve_data
74
+ t = time
75
+ r = handle_parsed(tag, record, time, {})
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ def parse_multilines(tag, time, record, line)
82
+ if @@lines_buffer.has_key?(tag)
83
+ matches = @parser.firstline?(line)
84
+ if matches
85
+ index = line.index(matches[0])
86
+ if index && index > 0
87
+ @@lines_buffer[tag] << line[0..index]
88
+ line = line[index..-1]
89
+ end
90
+ parse_singleline(tag, time, record, @@lines_buffer[tag])
91
+ @@lines_buffer[tag] = line
92
+ else
93
+ @@lines_buffer[tag] << line
94
+ end
95
+ else
96
+ @@lines_buffer[tag] = line
97
+ end
98
+ end
99
+
100
+
101
+ def emit(tag, es, chain)
102
+ tag = if @tag
103
+ @tag
104
+ else
105
+ if @remove_prefix and
106
+ ( (tag.start_with?(@removed_prefix_string) and tag.length > @removed_length) or tag == @remove_prefix)
107
+ tag = tag[@removed_length..-1]
108
+ end
109
+ if @add_prefix
110
+ tag = if tag and tag.length > 0
111
+ @added_prefix_string + tag
112
+ else
113
+ @add_prefix
114
+ end
115
+ end
116
+ tag
117
+ end
118
+ es.each do |time,record|
119
+ raw_value = record[@key_name]
120
+ begin
121
+ @receive_handler.call(tag, time, record, raw_value)
122
+ rescue Fluent::TextParser::ParserError => e
123
+ log.warn e.message unless @suppress_parse_error_log
124
+ rescue ArgumentError => e
125
+ if @replace_invalid_sequence
126
+ unless e.message.index("invalid byte sequence in") == 0
127
+ raise
128
+ end
129
+ replaced_string = replace_invalid_byte(raw_value)
130
+ @parser.parse(replaced_string) do |t,values|
131
+ if values
132
+ t ||= time
133
+ handle_parsed(tag, record, t, values)
134
+ else
135
+ log.warn "pattern not match with data '#{raw_value}'" unless @suppress_parse_error_log
136
+ if @reserve_data
137
+ t = time
138
+ handle_parsed(tag, record, time, {})
139
+ end
140
+ end
141
+ end
142
+ else
143
+ raise
144
+ end
145
+ rescue => e
146
+ log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
147
+ end
148
+ end
149
+
150
+ chain.next
151
+ end
152
+
153
+ private
154
+
155
+ def handle_parsed(tag, record, t, values)
156
+ if values && @inject_key_prefix
157
+ values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
158
+ end
159
+ r = @hash_value_field ? {@hash_value_field => values} : values
160
+ if @reserve_data
161
+ r = r ? record.merge(r) : record
162
+ end
163
+ router.emit(tag, t, r)
164
+ end
165
+
166
+ def replace_invalid_byte(string)
167
+ replace_options = { invalid: :replace, undef: :replace, replace: '?' }
168
+ original_encoding = string.encoding
169
+ temporal_encoding = (original_encoding == Encoding::UTF_8 ? Encoding::UTF_16BE : Encoding::UTF_8)
170
+ string.encode(temporal_encoding, original_encoding, replace_options).encode(original_encoding)
171
+ end
172
+ end
@@ -0,0 +1,39 @@
1
+ module Fluent
2
+ class TextParser
3
+ class KVPairParser < Parser
4
+ Plugin.register_parser('kv_pair', self)
5
+
6
+ # key<delim1>value is pair and <pair><delim2><pair> ...
7
+ # newline splits records
8
+ include Configurable
9
+
10
+ config_param :delim1, :string
11
+ config_param :delim2, :string
12
+
13
+ config_param :time_key, :string, :default => "time"
14
+ config_param :time_format, :string, :default => nil # time_format is configurable
15
+
16
+ def configure(conf)
17
+ super
18
+ @time_parser = TimeParser.new(@time_format)
19
+ end
20
+
21
+ def parse(text)
22
+ text.split("\n").each do |line|
23
+ pairs = text.split(@delim2)
24
+ record = {}
25
+ time = nil
26
+ pairs.each do |pair|
27
+ k, v = pair.split(@delim1, 2)
28
+ if k == @time_key
29
+ time = @time_parser.parse(v)
30
+ else
31
+ record[k] = v
32
+ end
33
+ end
34
+ yield time, record
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,31 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ # pass
20
+ end
21
+ }
22
+ $log = nulllogger
23
+ end
24
+
25
+ require 'fluent/plugin/out_parser'
26
+ require 'fluent/plugin/out_deparser'
27
+ require 'fluent/plugin/filter_parser'
28
+ require 'fluent/plugin/filter_deparser'
29
+
30
+ class Test::Unit::TestCase
31
+ end
@@ -0,0 +1,149 @@
1
+ require 'helper'
2
+
3
+ class DeparserOutputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ remove_prefix test
10
+ add_prefix combined
11
+ format %s: %s %s %s
12
+ format_key_names host,path,status,size
13
+ key_name fulltext
14
+ reserve_data true
15
+ ]
16
+
17
+ def create_driver(conf=CONFIG,tag='test')
18
+ Fluent::Test::OutputTestDriver.new(Fluent::DeparserOutput, tag).configure(conf)
19
+ end
20
+
21
+ def test_configure
22
+ assert_nothing_raised {
23
+ d = create_driver %[
24
+ tag april.fool
25
+ ]
26
+ }
27
+
28
+ assert_raise(Fluent::ConfigError) {
29
+ d = create_driver('')
30
+ }
31
+ assert_raise(Fluent::ConfigError) {
32
+ d = create_driver %[
33
+ tag foo.bar
34
+ ]
35
+ }
36
+ assert_raise(Fluent::ConfigError) {
37
+ d = create_driver %[
38
+ format %s
39
+ format_key_names x
40
+ ]
41
+ }
42
+ assert_raise(Fluent::ConfigError) {
43
+ d = create_driver %[
44
+ tag foo.bar
45
+ remove_prefix foo
46
+ format %s
47
+ format_key_names x
48
+ ]
49
+ }
50
+ assert_raise(Fluent::ConfigError) {
51
+ d = create_driver %[
52
+ tag foo.bar
53
+ add_prefix foo
54
+ format %s
55
+ format_key_names x
56
+ ]
57
+ }
58
+ assert_raise(Fluent::ConfigError) {
59
+ d = create_driver %[
60
+ tag foo.bar
61
+ format_key_names x
62
+ ]
63
+ }
64
+ assert_raise(Fluent::ConfigError) {
65
+ d = create_driver %[
66
+ tag foo.bar
67
+ format %s
68
+ ]
69
+ }
70
+ assert_raise(Fluent::ConfigError) {
71
+ d = create_driver %[
72
+ tag foo.bar
73
+ format %s %s %s
74
+ format_key_names x,y
75
+ ]
76
+ }
77
+ assert_nothing_raised(Fluent::ConfigError) {
78
+ # mmm...
79
+ d = create_driver %[
80
+ tag foo.bar
81
+ format %s %s
82
+ format_key_names x,y,z
83
+ ]
84
+ }
85
+
86
+ d = create_driver %[
87
+ tag foo.bar
88
+ format %s: %s %s
89
+ format_key_names x,y,z
90
+ ]
91
+ assert_equal 'foo.bar', d.instance.tag
92
+ assert_equal '%s: %s %s', d.instance.format
93
+ assert_equal ['x','y','z'], d.instance.format_key_names
94
+ assert_equal 'message', d.instance.key_name
95
+ assert_equal false, d.instance.reserve_data
96
+ end
97
+
98
+ # CONFIG = %[
99
+ # remove_prefix test
100
+ # add_prefix combined
101
+ # format %s: %s %s %s
102
+ # format_key_names host path status size
103
+ # key_name fulltext
104
+ # reserve_data true
105
+ # ]
106
+ def test_emit
107
+ d1 = create_driver(CONFIG, 'test.in')
108
+ time = Time.parse("2012-01-02 13:14:15").to_i
109
+ d1.run do
110
+ d1.emit({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
111
+ d1.emit({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
112
+ end
113
+ emits = d1.emits
114
+ assert_equal 2, emits.length
115
+ first = emits[0]
116
+ assert_equal 'combined.in', first[0]
117
+ assert_equal time, first[1]
118
+ assert_equal 'xxx.local: /f/1 200 300', first[2]['fulltext']
119
+ assert_equal ['fulltext','host','path','size','status'], first[2].keys.sort
120
+ second = emits[1]
121
+ assert_equal 'combined.in', second[0]
122
+ assert_equal time, second[1]
123
+ assert_equal 'yyy.local: /f/2 302 512', second[2]['fulltext']
124
+ assert_equal ['fulltext','host','path','size','status'], second[2].keys.sort
125
+
126
+ d2 = create_driver(%[
127
+ tag combined
128
+ format %s: %s %s
129
+ format_key_names host,path,status
130
+ ], 'test.in')
131
+ time = Time.parse("2012-01-02 13:14:15").to_i
132
+ d2.run do
133
+ d2.emit({'host'=>'xxx.local','path'=>'/f/1','status'=>'200','size'=>300}, time)
134
+ d2.emit({'host'=>'yyy.local','path'=>'/f/2','status'=>'302','size'=>512}, time)
135
+ end
136
+ emits = d2.emits
137
+ assert_equal 2, emits.length
138
+ first = emits[0]
139
+ assert_equal 'combined', first[0]
140
+ assert_equal time, first[1]
141
+ assert_equal 'xxx.local: /f/1 200', first[2]['message']
142
+ assert_equal ['message'], first[2].keys.sort
143
+ second = emits[1]
144
+ assert_equal 'combined', second[0]
145
+ assert_equal time, second[1]
146
+ assert_equal 'yyy.local: /f/2 302', second[2]['message']
147
+ assert_equal ['message'], second[2].keys.sort
148
+ end
149
+ end