fluent-plugin-parser 0.3.4 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 11c8d0bf5065c2ea089887da434f380c6477797c
4
- data.tar.gz: 05dd0a148aed3f8020eb839fd5042298464296ed
3
+ metadata.gz: 1cfb579022231c0bf26eb16a1156a7e9e5aa24b8
4
+ data.tar.gz: 6b096b93cce0ddf6995e92058e44a46c0ac59b6f
5
5
  SHA512:
6
- metadata.gz: 4bffef34b921fe845581381aa759fdeea68bf40ea7b95031f7e8909bd5516828f9b31964b550048bd1507e2dd337394e3c2f51ca38afd814c37c17c610353d40
7
- data.tar.gz: 3471c464f75070f30e4ce2bd97e6bb5a10662e779d7cce00b4d0f41f2efdf6d5ba60c77f36a73ad46a81aa48f17ea10ff0e3f6527f9e1f82b7872ebb47fb1fbf
6
+ metadata.gz: 9f59e117075d751a7a734b8e4ad15e9ef1e1a0bce142285c4f6e7b534d235009ee207d0918219ec5a3fbfdcb928280abc893957587ff5d8059d519cf0f016469
7
+ data.tar.gz: e171adea68763ffd821fc34d7ef27298122c2155e8238d461aa563e3cf3395a97c8b18ae5a125e9368222d3c9fd08ac9f1e63bea67063ad6d49f194f00065e9e
data/README.md CHANGED
@@ -34,6 +34,9 @@ Of course, you can use predefined format 'apache' and 'syslog':
34
34
  key_name message
35
35
  </match>
36
36
 
37
+ `fluent-plugin-parser` uses parser plugins of Fluentd (and your own customized parser plugin).
38
+ See document page for more details: http://docs.fluentd.org/articles/parser-plugin-overview
39
+
37
40
  If you want original attribute-data pair in re-emitted message, specify 'reserve_data':
38
41
 
39
42
  <match raw.apache.*>
@@ -44,30 +47,6 @@ If you want original attribute-data pair in re-emitted message, specify 'reserve
44
47
  reserve_data yes
45
48
  </match>
46
49
 
47
- Format 'json', 'csv' and 'tsv' is also supported:
48
-
49
- <match raw.sales.*>
50
- type parser
51
- tag sales
52
- format json
53
- key_name sales
54
- </match>
55
-
56
- Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
57
-
58
- <match raw.sales.*>
59
- type parser
60
- tag sales
61
- format ltsv
62
- key_name sales
63
- </match>
64
-
65
- 'LTSV' is format like below, unlinke json, easy to write with simple formatter (ex: LogFormat of apache):
66
-
67
- KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
68
-
69
- About LTSV, see: http://ltsv.org/
70
-
71
50
  If you want to suppress 'pattern not match' log, specify 'suppress\_parse\_error\_log true' to configuration.
72
51
  default value is false.
73
52
 
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.3.4"
4
+ gem.version = "0.4.0"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -15,5 +15,5 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
 
17
17
  gem.add_development_dependency "rake"
18
- gem.add_runtime_dependency "fluentd"
18
+ gem.add_runtime_dependency "fluentd", ">= 0.10.54"
19
19
  end
@@ -1,4 +1,4 @@
1
- require_relative './fixed_parser'
1
+ require 'fluent/parser'
2
2
 
3
3
  class Fluent::ParserOutput < Fluent::Output
4
4
  Fluent::Plugin.register_output('parser', self)
@@ -11,6 +11,8 @@ class Fluent::ParserOutput < Fluent::Output
11
11
  config_param :inject_key_prefix, :string, :default => nil
12
12
  config_param :replace_invalid_sequence, :bool, :default => false
13
13
  config_param :hash_value_field, :string, :default => nil
14
+ config_param :suppress_parse_error_log, :bool, :default => false
15
+ config_param :time_parse, :bool, :default => true
14
16
 
15
17
  attr_reader :parser
16
18
 
@@ -19,11 +21,6 @@ class Fluent::ParserOutput < Fluent::Output
19
21
  require 'time'
20
22
  end
21
23
 
22
- # Define `log` method for v0.10.42 or earlier
23
- unless method_defined?(:log)
24
- define_method("log") { $log }
25
- end
26
-
27
24
  def configure(conf)
28
25
  super
29
26
 
@@ -41,8 +38,15 @@ class Fluent::ParserOutput < Fluent::Output
41
38
  @added_prefix_string = @add_prefix + '.'
42
39
  end
43
40
 
44
- @parser = FluentExt::TextParser.new(log())
41
+ @parser = Fluent::TextParser.new
42
+ @parser.estimate_current_event = false
45
43
  @parser.configure(conf)
44
+ if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
45
+ # disable parse time
46
+ @parser.parser.time_key = nil
47
+ end
48
+
49
+ self
46
50
  end
47
51
 
48
52
  def emit(tag, es, chain)
@@ -64,18 +68,28 @@ class Fluent::ParserOutput < Fluent::Output
64
68
  end
65
69
  es.each do |time,record|
66
70
  raw_value = record[@key_name]
67
- t,values = raw_value ? parse(raw_value) : [nil, nil]
68
- t ||= time
69
-
70
- if values && @inject_key_prefix
71
- values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
72
- end
73
- r = @hash_value_field ? {@hash_value_field => values} : values
74
- if @reserve_data
75
- r = r ? record.merge(r) : record
76
- end
77
- if r
78
- Fluent::Engine.emit(tag, t, r)
71
+ begin
72
+ @parser.parse(raw_value) do |t,values|
73
+ t ||= time
74
+ handle_parsed(tag, record, t, values)
75
+ end
76
+ rescue Fluent::TextParser::ParserError => e
77
+ log.warn e.message unless @suppress_parse_error_log
78
+ rescue ArgumentError => e
79
+ if @replace_invalid_sequence
80
+ unless e.message.index("invalid byte sequence in") == 0
81
+ raise
82
+ end
83
+ replaced_string = replace_invalid_byte(raw_value)
84
+ @parser.parse(replaced_string) do |t,values|
85
+ t ||= time
86
+ handle_parsed(tag, record, t, values)
87
+ end
88
+ else
89
+ raise
90
+ end
91
+ rescue => e
92
+ log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
79
93
  end
80
94
  end
81
95
 
@@ -84,17 +98,18 @@ class Fluent::ParserOutput < Fluent::Output
84
98
 
85
99
  private
86
100
 
87
- def parse(string)
88
- return @parser.parse(string) unless @replace_invalid_sequence
89
-
90
- begin
91
- @parser.parse(string)
92
- rescue ArgumentError => e
93
- unless e.message.index("invalid byte sequence in") == 0
94
- raise
95
- end
96
- replaced_string = replace_invalid_byte(string)
97
- @parser.parse(replaced_string)
101
+ def handle_parsed(tag, record, t, values)
102
+ if values && @inject_key_prefix
103
+ values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
104
+ end
105
+ r = @hash_value_field ? {@hash_value_field => values} : values
106
+ if @reserve_data
107
+ r = r ? record.merge(r) : record
108
+ end
109
+ if r
110
+ Fluent::Engine.emit(tag, t, r)
111
+ else
112
+ log.warn "pattern not match #{raw_value}" unless @suppress_parse_error_log
98
113
  end
99
114
  end
100
115
 
@@ -0,0 +1,39 @@
1
+ module Fluent
2
+ class TextParser
3
+ class KVPairParser
4
+ # key<delim1>value is pair and <pair><delim2><pair> ...
5
+ # newline splits records
6
+ include Configurable
7
+
8
+ config_param :delim1, :string
9
+ config_param :delim2, :string
10
+
11
+ config_param :time_key, :string, :default => "time"
12
+ config_param :time_format, :string, :default => nil # time_format is configurable
13
+
14
+ def configure(conf)
15
+ super
16
+ @time_parser = TimeParser.new(@time_format)
17
+ end
18
+
19
+ def call(text)
20
+ text.split("\n").each do |line|
21
+ pairs = text.split(@delim2)
22
+ record = {}
23
+ time = nil
24
+ pairs.each do |pair|
25
+ k, v = pair.split(@delim1, 2)
26
+ if k == @time_key
27
+ time = @time_parser.parse(v)
28
+ else
29
+ record[k] = v
30
+ end
31
+ end
32
+ yield time, record
33
+ end
34
+ end
35
+ end
36
+
37
+ register_template("kv_pair", Proc.new { KVPairParser.new })
38
+ end
39
+ end
@@ -304,6 +304,26 @@ class ParserOutputTest < Test::Unit::TestCase
304
304
  assert_equal "xxx:first\tyyy:second", first[2]['data']
305
305
  assert_equal 'first', second[2]['xxx']
306
306
  assert_equal 'second2', second[2]['yyy']
307
+
308
+ # convert types
309
+ d = create_driver(CONFIG_LTSV + %[
310
+ types i:integer,s:string,f:float,b:bool
311
+ ], 'foo.baz.test')
312
+ time = Time.parse("2012-04-02 18:20:59").to_i
313
+ d.run do
314
+ d.emit({'data' => "i:1\ts:2\tf:3\tb:true\tx:123"}, time)
315
+ end
316
+ emits = d.emits
317
+ assert_equal 1, emits.length
318
+
319
+ first = emits[0]
320
+ assert_equal 'foo.bar.test', first[0]
321
+ assert_equal time, first[1]
322
+ assert_equal 1, first[2]['i']
323
+ assert_equal '2', first[2]['s']
324
+ assert_equal 3.0, first[2]['f']
325
+ assert_equal true, first[2]['b']
326
+ assert_equal '123', first[2]['x']
307
327
  end
308
328
 
309
329
  CONFIG_TSV = %[
@@ -473,8 +493,6 @@ class ParserOutputTest < Test::Unit::TestCase
473
493
  t = Time.now.to_i
474
494
  d = create_driver(CONFIG_DONT_PARSE_TIME, 'test.in')
475
495
 
476
- assert_equal false, d.instance.instance_eval{ @parser }.instance_eval{ @parser }.time_parse
477
-
478
496
  d.run do
479
497
  d.emit({'data' => '{"time":1383190430, "f1":"v1"}'}, t)
480
498
  d.emit({'data' => '{"time":"1383190430", "f1":"v1"}'}, t)
@@ -515,21 +533,14 @@ class ParserOutputTest < Test::Unit::TestCase
515
533
  end
516
534
  }
517
535
  emits = d.emits
518
- assert_equal 2, emits.length
536
+ assert_equal 1, emits.length
519
537
 
520
538
  assert_equal 'in', emits[0][0]
521
- assert_equal t, emits[0][1]
539
+ assert_equal 0, emits[0][1]
522
540
  assert_equal 'v1', emits[0][2]['f1']
523
- assert_equal [], emits[0][2]['time']
524
-
525
- assert_equal 'in', emits[1][0]
526
- assert_equal t, emits[1][1]
527
- assert_equal 'v1', emits[1][2]['f1']
528
- assert_equal 'thisisnottime', emits[1][2]['time']
541
+ assert_equal 0, emits[0][2]['time'].to_i
529
542
  end
530
543
 
531
-
532
- #TODO: apache2
533
544
  # REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
534
545
 
535
546
  CONFIG_NOT_REPLACE = %[
@@ -619,26 +630,27 @@ class ParserOutputTest < Test::Unit::TestCase
619
630
 
620
631
  def swap_logger(instance)
621
632
  raise "use with block" unless block_given?
622
- parser_logger = instance.parser.log
623
633
  dummy = DummyLogger.new
624
- instance.parser.log = dummy
625
- instance.parser.parser.log = dummy
626
-
627
- restore = if instance.respond_to?("log=".to_sym)
628
- saved_logger = instance.log
629
- instance.log = dummy
630
- lambda{ instance.log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
631
- else
632
- saved_logger = $log
633
- $log = dummy
634
- lambda{ $log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
635
- end
634
+ saved_logger = instance.log
635
+ instance.log = dummy
636
+ restore = lambda{ instance.log = saved_logger }
636
637
 
637
638
  yield
638
639
 
639
640
  restore.call
640
641
  end
641
642
 
643
+ def test_parser_error_warning
644
+ d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.in')
645
+ swap_logger(d.instance) do
646
+ assert_raise(DummyLoggerWarnedException) {
647
+ d.run do
648
+ d.emit({'data' => '{"time":[], "f1":"v1"}'}, Time.now.to_i)
649
+ end
650
+ }
651
+ end
652
+ end
653
+
642
654
  def test_suppress_parse_error_log
643
655
  # default(disabled) 'suppress_parse_error_log' is not specify
644
656
  d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
@@ -0,0 +1,285 @@
1
+ require 'helper'
2
+ require_relative '../custom_parser'
3
+
4
+ class ParserOutputParsersTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ def create_driver(conf, tag)
10
+ Fluent::Test::OutputTestDriver.new(Fluent::ParserOutput, tag).configure(conf)
11
+ end
12
+
13
+ def test_regexp_parser
14
+ # exists in test_out_parser
15
+ end
16
+
17
+ def test_json_parser
18
+ # exists in test_out_parser
19
+ end
20
+
21
+ def test_tsv_parser
22
+ # exists in test_out_parser
23
+ end
24
+
25
+ def test_ltsv_parser
26
+ # exists in test_out_parser
27
+ end
28
+
29
+ def test_csv_parser
30
+ # exists in test_out_parser
31
+ end
32
+
33
+ def test_none_parser
34
+ d = create_driver(<<EOF, 'test.in')
35
+ remove_prefix test
36
+ add_prefix parsed
37
+ key_name message
38
+ format none
39
+ EOF
40
+ time = Time.parse("2014-11-05 15:59:30").to_i
41
+ d.run do
42
+ d.emit({"message" => "aaaa bbbb cccc 1"}, time)
43
+ d.emit({"message" => "aaaa bbbb cccc 2"}, time)
44
+ d.emit({"message" => "aaaa bbbb cccc 3"}, time)
45
+ d.emit({"message" => "aaaa bbbb cccc 4"}, time)
46
+ end
47
+
48
+ e = d.emits
49
+ assert_equal 4, e.length
50
+
51
+ assert_equal 'parsed.in', e[0][0]
52
+ assert_equal time, e[0][1]
53
+ assert_equal 'aaaa bbbb cccc 1', e[0][2]['message']
54
+
55
+ assert_equal 'parsed.in', e[1][0]
56
+ assert_equal time, e[1][1]
57
+ assert_equal 'aaaa bbbb cccc 2', e[1][2]['message']
58
+
59
+ assert_equal 'parsed.in', e[2][0]
60
+ assert_equal time, e[2][1]
61
+ assert_equal 'aaaa bbbb cccc 3', e[2][2]['message']
62
+
63
+ assert_equal 'parsed.in', e[3][0]
64
+ assert_equal time, e[3][1]
65
+ assert_equal 'aaaa bbbb cccc 4', e[3][2]['message']
66
+ end
67
+
68
+ def test_apache_parser
69
+ log1 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
70
+ log2 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
71
+ log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
72
+
73
+ d = create_driver(<<EOF, 'test.in')
74
+ remove_prefix test
75
+ add_prefix parsed
76
+ key_name message
77
+ format apache
78
+ EOF
79
+ time = Time.parse("2014-11-05 15:59:30").to_i
80
+ d.run do
81
+ d.emit({"message" => log1}, time)
82
+ d.emit({"message" => log2}, time)
83
+ end
84
+
85
+ e = d.emits
86
+ assert_equal 2, e.length
87
+
88
+ assert_equal 'parsed.in', e[0][0]
89
+ assert_equal log_time, e[0][1]
90
+ assert_equal '127.0.0.1', e[0][2]['host']
91
+ assert_equal 'frank', e[0][2]['user']
92
+ assert_equal 'GET', e[0][2]['method']
93
+ assert_equal '/apache_pb.gif', e[0][2]['path']
94
+ assert_equal '200', e[0][2]['code']
95
+ assert_equal '2326', e[0][2]['size']
96
+ assert_nil e[0][2]['referer']
97
+ assert_nil e[0][2]['agent']
98
+
99
+ assert_equal 'parsed.in', e[1][0]
100
+ assert_equal log_time, e[1][1]
101
+ assert_equal '127.0.0.1', e[1][2]['host']
102
+ assert_equal 'frank', e[1][2]['user']
103
+ assert_equal 'GET', e[1][2]['method']
104
+ assert_equal '/apache_pb.gif', e[1][2]['path']
105
+ assert_equal '200', e[1][2]['code']
106
+ assert_equal '2326', e[1][2]['size']
107
+ assert_equal 'http://www.example.com/start.html', e[1][2]['referer']
108
+ assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[1][2]['agent']
109
+ end
110
+
111
+ def test_apache_parser_with_types
112
+ log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
113
+ log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
114
+
115
+ d = create_driver(<<EOF, 'test.in')
116
+ remove_prefix test
117
+ add_prefix parsed
118
+ key_name message
119
+ format apache
120
+ types code:integer,size:integer
121
+ EOF
122
+ time = Time.parse("2014-11-05 15:59:30").to_i
123
+ d.run do
124
+ d.emit({"message" => log}, time)
125
+ end
126
+
127
+ e = d.emits
128
+ assert_equal 1, e.length
129
+
130
+ assert_equal 'parsed.in', e[0][0]
131
+ assert_equal log_time, e[0][1]
132
+ assert_equal '127.0.0.1', e[0][2]['host']
133
+ assert_equal 'frank', e[0][2]['user']
134
+ assert_equal 'GET', e[0][2]['method']
135
+ assert_equal '/apache_pb.gif', e[0][2]['path']
136
+ assert_equal 200, e[0][2]['code']
137
+ assert_equal 2326, e[0][2]['size']
138
+ assert_equal 'http://www.example.com/start.html', e[0][2]['referer']
139
+ assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[0][2]['agent']
140
+ end
141
+
142
+ def test_syslog_parser
143
+ loglines = <<LOGS
144
+ Nov 5 16:19:48 myhost.local netbiosd[50]: name servers down?
145
+ Nov 5 16:21:20 myhost.local coreaudiod[320]: Disabled automatic stack shots because audio IO is active
146
+ Nov 5 16:21:20 myhost.local coreaudiod[320]: Enabled automatic stack shots because audio IO is inactive
147
+ LOGS
148
+ logs = loglines.split("\n").reject(&:empty?)
149
+
150
+ d = create_driver(<<EOF, 'test.in')
151
+ remove_prefix test
152
+ add_prefix parsed
153
+ key_name message
154
+ format syslog
155
+ EOF
156
+ time = Time.parse("2014-11-05 15:59:30").to_i
157
+ d.run do
158
+ d.emit({"message" => logs[0]}, time)
159
+ d.emit({"message" => logs[1]}, time)
160
+ d.emit({"message" => logs[2]}, time)
161
+ end
162
+
163
+ emits = d.emits
164
+ assert_equal 3, emits.length
165
+
166
+ e = emits[0]
167
+ assert_equal 'parsed.in', e[0]
168
+ assert_equal Time.parse("2014-11-05 16:19:48").to_i, e[1]
169
+ r = e[2]
170
+ assert_equal 'myhost.local', r['host']
171
+ assert_equal 'netbiosd', r['ident']
172
+ assert_equal '50', r['pid']
173
+ assert_equal 'name servers down?', r['message']
174
+
175
+ e = emits[1]
176
+ assert_equal 'parsed.in', e[0]
177
+ assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
178
+ r = e[2]
179
+ assert_equal 'myhost.local', r['host']
180
+ assert_equal 'coreaudiod', r['ident']
181
+ assert_equal '320', r['pid']
182
+ assert_equal 'Disabled automatic stack shots because audio IO is active', r['message']
183
+
184
+ e = emits[2]
185
+ assert_equal 'parsed.in', e[0]
186
+ assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
187
+ r = e[2]
188
+ assert_equal 'myhost.local', r['host']
189
+ assert_equal 'coreaudiod', r['ident']
190
+ assert_equal '320', r['pid']
191
+ assert_equal 'Enabled automatic stack shots because audio IO is inactive', r['message']
192
+ end
193
+
194
+ def x_test_multiline_parser
195
+ # I can't configure this format well...
196
+ log1 = <<LOG
197
+ *** 2014/11/05 16:33:01 -0700
198
+ host: myhost
199
+ port: 2048
200
+ message: first line
201
+ LOG
202
+ log2 = <<LOG
203
+ *** 2014/11/05 16:33:02 +0900
204
+ host: myhost
205
+ port: 2049
206
+ message: second line
207
+ LOG
208
+ log3 = <<LOG
209
+ *** 2014/11/05 16:43:11 +1100
210
+ LOG
211
+ d = create_driver(<<'EOF', 'test.in')
212
+ remove_prefix test
213
+ add_prefix parsed
214
+ key_name message
215
+ format multiline
216
+ time_format %Y/%m/%d %H:%M:%S %z
217
+ format_firstline /^\*\*\* /
218
+ format1 /\*\*\* (?<time>\d{4}/\d\d/\d\d/ \d\d:\d\d:\d\d [-+]\d{4})/
219
+ format2 /\s*host: (?<host>[^\s]+)/
220
+ format3 /\s*port: (?<port>\d+)/
221
+ format4 /\s*message: (?<message>[^ ]*)/
222
+ EOF
223
+ time = Time.parse("2014-11-05 15:59:30").to_i
224
+ d.run do
225
+ d.emit({"message" => log1}, time)
226
+ d.emit({"message" => log2}, time)
227
+ d.emit({"message" => log3}, time)
228
+ end
229
+
230
+ emits = d.emits
231
+ assert_equal 2, emits.length
232
+
233
+ e = emits[0]
234
+ assert_equal 'parsed.in', e[0]
235
+ assert_equal Time.parse("2014-11-05 16:33:01 -0700").to_i, e[1]
236
+ r = e[2]
237
+ assert_equal 'myhost', r['host']
238
+ assert_equal '2048', r['port']
239
+ assert_equal 'first line', r['message']
240
+
241
+ e = emits[1]
242
+ assert_equal 'parsed.in', e[0]
243
+ assert_equal Time.parse("2014-11-05 16:33:02 +0900").to_i, e[1]
244
+ r = e[2]
245
+ assert_equal 'myhost', r['host']
246
+ assert_equal '2049', r['port']
247
+ assert_equal 'second line', r['message']
248
+ end
249
+
250
+ def test_custom_parser
251
+ d = create_driver(<<'EOF', 'test.in')
252
+ remove_prefix test
253
+ add_prefix parsed
254
+ key_name message
255
+ format kv_pair
256
+ time_format %Y-%m-%d %H:%M:%S %z
257
+ delim1 :
258
+ delim2 ,
259
+ EOF
260
+ time = Time.parse("2014-11-05 15:59:30").to_i
261
+ d.run do
262
+ d.emit({"message" => "k1:v1,k2:v2,k3:1,time:2014-11-05 00:00:00 +0000"}, time)
263
+ d.emit({"message" => "k1:v1,k2:v2,k3:2"}, time) # original time is used
264
+ d.emit({"message" => "k1:v1,k2:v2,k3:3,time:2014-11-05 00:00:00"}, time) # time parse error -> not emitted
265
+ end
266
+ emits = d.emits
267
+ assert_equal 2, emits.length
268
+
269
+ e = emits[0]
270
+ assert_equal 'parsed.in', e[0]
271
+ assert_equal Time.parse("2014-11-05 00:00:00 +0000").to_i, e[1]
272
+ r = e[2]
273
+ assert_equal 'v1', r['k1']
274
+ assert_equal 'v2', r['k2']
275
+ assert_equal '1', r['k3']
276
+
277
+ e = emits[1]
278
+ assert_equal 'parsed.in', e[0]
279
+ assert_equal Time.parse("2014-11-05 15:59:30").to_i, e[1]
280
+ r = e[2]
281
+ assert_equal 'v1', r['k1']
282
+ assert_equal 'v2', r['k2']
283
+ assert_equal '2', r['k3']
284
+ end
285
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-21 00:00:00.000000000 Z
11
+ date: 2014-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 0.10.54
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 0.10.54
41
41
  description: fluentd plugin to parse single field, or to combine log structure into
42
42
  single field
43
43
  email:
@@ -53,12 +53,13 @@ files:
53
53
  - README.md
54
54
  - Rakefile
55
55
  - fluent-plugin-parser.gemspec
56
- - lib/fluent/plugin/fixed_parser.rb
57
56
  - lib/fluent/plugin/out_deparser.rb
58
57
  - lib/fluent/plugin/out_parser.rb
58
+ - test/custom_parser.rb
59
59
  - test/helper.rb
60
60
  - test/plugin/test_deparser.rb
61
61
  - test/plugin/test_out_parser.rb
62
+ - test/plugin/test_out_parser_for_parsers.rb
62
63
  homepage: https://github.com/tagomoris/fluent-plugin-parser
63
64
  licenses:
64
65
  - APLv2
@@ -84,6 +85,8 @@ signing_key:
84
85
  specification_version: 4
85
86
  summary: plugin to parse/combine fluentd log messages
86
87
  test_files:
88
+ - test/custom_parser.rb
87
89
  - test/helper.rb
88
90
  - test/plugin/test_deparser.rb
89
91
  - test/plugin/test_out_parser.rb
92
+ - test/plugin/test_out_parser_for_parsers.rb
@@ -1,280 +0,0 @@
1
- #
2
- # This module is copied from fluentd/lib/fluent/parser.rb and
3
- # fixed not to overwrite 'time' (reserve nil) when time not found in parsed string.
4
- module FluentExt; end
5
-
6
- class FluentExt::TextParser
7
- class GenericParser
8
- include Fluent::Configurable
9
-
10
- config_param :time_key, :string, :default => 'time'
11
- config_param :time_format, :string, :default => nil
12
- config_param :time_parse, :bool, :default => true
13
-
14
- attr_accessor :log
15
-
16
- def initialize
17
- super
18
-
19
- @cache1_key = nil
20
- @cache1_time = nil
21
- @cache2_key = nil
22
- @cache2_time = nil
23
-
24
- @log = nil
25
- end
26
-
27
- def parse_time(record)
28
- time = nil
29
-
30
- unless @time_parse
31
- return time, record
32
- end
33
-
34
- if value = record.delete(@time_key)
35
- if @cache1_key == value
36
- time = @cache1_time
37
- elsif @cache2_key == value
38
- time = @cache2_time
39
- else
40
- begin
41
- time = if @time_format
42
- Time.strptime(value, @time_format).to_i
43
- else
44
- Time.parse(value).to_i
45
- end
46
- @cache1_key = @cache2_key
47
- @cache1_time = @cache2_time
48
- @cache2_key = value
49
- @cache2_time = time
50
- rescue TypeError, ArgumentError => e
51
- @log.warn "Failed to parse time", :key => @time_key, :value => value
52
- record[@time_key] = value
53
- end
54
- end
55
- end
56
-
57
- return time, record
58
- end
59
- end
60
-
61
- class RegexpParser < GenericParser
62
- include Fluent::Configurable
63
-
64
- config_param :suppress_parse_error_log, :bool, :default => false
65
-
66
- def initialize(regexp, conf={})
67
- super()
68
- @regexp = regexp
69
- unless conf.empty?
70
- configure(conf)
71
- end
72
- end
73
-
74
- def call(text)
75
- m = @regexp.match(text)
76
- unless m
77
- unless @suppress_parse_error_log
78
- @log.warn "pattern not match: #{text}"
79
- end
80
-
81
- return nil, nil
82
- end
83
-
84
- record = {}
85
- m.names.each {|name|
86
- record[name] = m[name] if m[name]
87
- }
88
- parse_time(record)
89
- end
90
- end
91
-
92
- class JSONParser < GenericParser
93
- def call(text)
94
- record = Yajl.load(text)
95
- return parse_time(record)
96
- rescue Yajl::ParseError
97
- unless @suppress_parse_error_log
98
- @log.warn "pattern not match(json): #{text.inspect}: #{$!}"
99
- end
100
-
101
- return nil, nil
102
- end
103
- end
104
-
105
- class LabeledTSVParser < GenericParser
106
- def call(text)
107
- record = Hash[text.split("\t").map{|p| p.split(":", 2)}]
108
- parse_time(record)
109
- end
110
- end
111
-
112
- class ValuesParser < GenericParser
113
- config_param :keys, :string
114
-
115
- def configure(conf)
116
- super
117
- @keys = @keys.split(",")
118
- end
119
-
120
- def values_map(values)
121
- Hash[@keys.zip(values)]
122
- end
123
- end
124
-
125
- class TSVParser < ValuesParser
126
- config_param :delimiter, :string, :default => "\t"
127
-
128
- def call(text)
129
- return parse_time(values_map(text.split(@delimiter)))
130
- end
131
- end
132
-
133
- class CSVParser < ValuesParser
134
- def initialize
135
- super
136
- require 'csv'
137
- end
138
-
139
- def call(text)
140
- return parse_time(values_map(CSV.parse_line(text)))
141
- end
142
- end
143
-
144
- class ApacheParser < GenericParser
145
- include Fluent::Configurable
146
-
147
- REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
148
-
149
- def initialize
150
- super
151
-
152
- @time_key = "time"
153
- @time_format = "%d/%b/%Y:%H:%M:%S %z"
154
- end
155
-
156
- def call(text)
157
- m = REGEXP.match(text)
158
- unless m
159
- unless @suppress_parse_error_log
160
- @log.warn "pattern not match: #{text.inspect}"
161
- end
162
-
163
- return nil, nil
164
- end
165
-
166
- host = m['host']
167
- host = (host == '-') ? nil : host
168
-
169
- user = m['user']
170
- user = (user == '-') ? nil : user
171
-
172
- time = m['time']
173
-
174
- method = m['method']
175
- path = m['path']
176
-
177
- code = m['code'].to_i
178
- code = nil if code == 0
179
-
180
- size = m['size']
181
- size = (size == '-') ? nil : size.to_i
182
-
183
- referer = m['referer']
184
- referer = (referer == '-') ? nil : referer
185
-
186
- agent = m['agent']
187
- agent = (agent == '-') ? nil : agent
188
-
189
- record = {
190
- "time" => time,
191
- "host" => host,
192
- "user" => user,
193
- "method" => method,
194
- "path" => path,
195
- "code" => code,
196
- "size" => size,
197
- "referer" => referer,
198
- "agent" => agent,
199
- }
200
-
201
- parse_time(record)
202
- end
203
- end
204
-
205
- TEMPLATE_FACTORIES = {
206
- 'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
207
- 'apache2' => Proc.new { ApacheParser.new },
208
- 'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
209
- 'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
210
- 'json' => Proc.new { JSONParser.new },
211
- 'csv' => Proc.new { CSVParser.new },
212
- 'tsv' => Proc.new { TSVParser.new },
213
- 'ltsv' => Proc.new { LabeledTSVParser.new },
214
- }
215
-
216
- def self.register_template(name, regexp_or_proc, time_format=nil)
217
-
218
- factory = if regexp_or_proc.is_a?(Regexp)
219
- regexp = regexp_or_proc
220
- Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
221
- else
222
- Proc.new { proc }
223
- end
224
- TEMPLATE_FACTORIES[name] = factory
225
- end
226
-
227
- attr_accessor :log
228
- attr_reader :parser
229
-
230
- def initialize(logger)
231
- @log = logger
232
- @parser = nil
233
- end
234
-
235
- def configure(conf, required=true)
236
- format = conf['format']
237
-
238
- if format == nil
239
- if required
240
- raise Fluent::ConfigError, "'format' parameter is required"
241
- else
242
- return nil
243
- end
244
- end
245
-
246
- if format[0] == ?/ && format[format.length-1] == ?/
247
- # regexp
248
- begin
249
- regexp = Regexp.new(format[1..-2])
250
- if regexp.named_captures.empty?
251
- raise "No named captures"
252
- end
253
- rescue
254
- raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
255
- end
256
- @parser = RegexpParser.new(regexp)
257
-
258
- else
259
- # built-in template
260
- factory = TEMPLATE_FACTORIES[format]
261
- unless factory
262
- raise Fluent::ConfigError, "Unknown format template '#{format}'"
263
- end
264
- @parser = factory.call
265
-
266
- end
267
-
268
- @parser.log = @log
269
-
270
- if @parser.respond_to?(:configure)
271
- @parser.configure(conf)
272
- end
273
-
274
- return true
275
- end
276
-
277
- def parse(text)
278
- return @parser.call(text)
279
- end
280
- end