fluent-plugin-parser 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 11c8d0bf5065c2ea089887da434f380c6477797c
4
- data.tar.gz: 05dd0a148aed3f8020eb839fd5042298464296ed
3
+ metadata.gz: 1cfb579022231c0bf26eb16a1156a7e9e5aa24b8
4
+ data.tar.gz: 6b096b93cce0ddf6995e92058e44a46c0ac59b6f
5
5
  SHA512:
6
- metadata.gz: 4bffef34b921fe845581381aa759fdeea68bf40ea7b95031f7e8909bd5516828f9b31964b550048bd1507e2dd337394e3c2f51ca38afd814c37c17c610353d40
7
- data.tar.gz: 3471c464f75070f30e4ce2bd97e6bb5a10662e779d7cce00b4d0f41f2efdf6d5ba60c77f36a73ad46a81aa48f17ea10ff0e3f6527f9e1f82b7872ebb47fb1fbf
6
+ metadata.gz: 9f59e117075d751a7a734b8e4ad15e9ef1e1a0bce142285c4f6e7b534d235009ee207d0918219ec5a3fbfdcb928280abc893957587ff5d8059d519cf0f016469
7
+ data.tar.gz: e171adea68763ffd821fc34d7ef27298122c2155e8238d461aa563e3cf3395a97c8b18ae5a125e9368222d3c9fd08ac9f1e63bea67063ad6d49f194f00065e9e
data/README.md CHANGED
@@ -34,6 +34,9 @@ Of course, you can use predefined format 'apache' and 'syslog':
34
34
  key_name message
35
35
  </match>
36
36
 
37
+ `fluent-plugin-parser` uses parser plugins of Fluentd (and your own customized parser plugin).
38
+ See document page for more details: http://docs.fluentd.org/articles/parser-plugin-overview
39
+
37
40
  If you want original attribute-data pair in re-emitted message, specify 'reserve_data':
38
41
 
39
42
  <match raw.apache.*>
@@ -44,30 +47,6 @@ If you want original attribute-data pair in re-emitted message, specify 'reserve
44
47
  reserve_data yes
45
48
  </match>
46
49
 
47
- Format 'json', 'csv' and 'tsv' is also supported:
48
-
49
- <match raw.sales.*>
50
- type parser
51
- tag sales
52
- format json
53
- key_name sales
54
- </match>
55
-
56
- Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
57
-
58
- <match raw.sales.*>
59
- type parser
60
- tag sales
61
- format ltsv
62
- key_name sales
63
- </match>
64
-
65
- 'LTSV' is format like below, unlinke json, easy to write with simple formatter (ex: LogFormat of apache):
66
-
67
- KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
68
-
69
- About LTSV, see: http://ltsv.org/
70
-
71
50
  If you want to suppress 'pattern not match' log, specify 'suppress\_parse\_error\_log true' to configuration.
72
51
  default value is false.
73
52
 
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.3.4"
4
+ gem.version = "0.4.0"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -15,5 +15,5 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
 
17
17
  gem.add_development_dependency "rake"
18
- gem.add_runtime_dependency "fluentd"
18
+ gem.add_runtime_dependency "fluentd", ">= 0.10.54"
19
19
  end
@@ -1,4 +1,4 @@
1
- require_relative './fixed_parser'
1
+ require 'fluent/parser'
2
2
 
3
3
  class Fluent::ParserOutput < Fluent::Output
4
4
  Fluent::Plugin.register_output('parser', self)
@@ -11,6 +11,8 @@ class Fluent::ParserOutput < Fluent::Output
11
11
  config_param :inject_key_prefix, :string, :default => nil
12
12
  config_param :replace_invalid_sequence, :bool, :default => false
13
13
  config_param :hash_value_field, :string, :default => nil
14
+ config_param :suppress_parse_error_log, :bool, :default => false
15
+ config_param :time_parse, :bool, :default => true
14
16
 
15
17
  attr_reader :parser
16
18
 
@@ -19,11 +21,6 @@ class Fluent::ParserOutput < Fluent::Output
19
21
  require 'time'
20
22
  end
21
23
 
22
- # Define `log` method for v0.10.42 or earlier
23
- unless method_defined?(:log)
24
- define_method("log") { $log }
25
- end
26
-
27
24
  def configure(conf)
28
25
  super
29
26
 
@@ -41,8 +38,15 @@ class Fluent::ParserOutput < Fluent::Output
41
38
  @added_prefix_string = @add_prefix + '.'
42
39
  end
43
40
 
44
- @parser = FluentExt::TextParser.new(log())
41
+ @parser = Fluent::TextParser.new
42
+ @parser.estimate_current_event = false
45
43
  @parser.configure(conf)
44
+ if !@time_parse && @parser.parser.respond_to?("time_key=".to_sym)
45
+ # disable parse time
46
+ @parser.parser.time_key = nil
47
+ end
48
+
49
+ self
46
50
  end
47
51
 
48
52
  def emit(tag, es, chain)
@@ -64,18 +68,28 @@ class Fluent::ParserOutput < Fluent::Output
64
68
  end
65
69
  es.each do |time,record|
66
70
  raw_value = record[@key_name]
67
- t,values = raw_value ? parse(raw_value) : [nil, nil]
68
- t ||= time
69
-
70
- if values && @inject_key_prefix
71
- values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
72
- end
73
- r = @hash_value_field ? {@hash_value_field => values} : values
74
- if @reserve_data
75
- r = r ? record.merge(r) : record
76
- end
77
- if r
78
- Fluent::Engine.emit(tag, t, r)
71
+ begin
72
+ @parser.parse(raw_value) do |t,values|
73
+ t ||= time
74
+ handle_parsed(tag, record, t, values)
75
+ end
76
+ rescue Fluent::TextParser::ParserError => e
77
+ log.warn e.message unless @suppress_parse_error_log
78
+ rescue ArgumentError => e
79
+ if @replace_invalid_sequence
80
+ unless e.message.index("invalid byte sequence in") == 0
81
+ raise
82
+ end
83
+ replaced_string = replace_invalid_byte(raw_value)
84
+ @parser.parse(replaced_string) do |t,values|
85
+ t ||= time
86
+ handle_parsed(tag, record, t, values)
87
+ end
88
+ else
89
+ raise
90
+ end
91
+ rescue => e
92
+ log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
79
93
  end
80
94
  end
81
95
 
@@ -84,17 +98,18 @@ class Fluent::ParserOutput < Fluent::Output
84
98
 
85
99
  private
86
100
 
87
- def parse(string)
88
- return @parser.parse(string) unless @replace_invalid_sequence
89
-
90
- begin
91
- @parser.parse(string)
92
- rescue ArgumentError => e
93
- unless e.message.index("invalid byte sequence in") == 0
94
- raise
95
- end
96
- replaced_string = replace_invalid_byte(string)
97
- @parser.parse(replaced_string)
101
+ def handle_parsed(tag, record, t, values)
102
+ if values && @inject_key_prefix
103
+ values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
104
+ end
105
+ r = @hash_value_field ? {@hash_value_field => values} : values
106
+ if @reserve_data
107
+ r = r ? record.merge(r) : record
108
+ end
109
+ if r
110
+ Fluent::Engine.emit(tag, t, r)
111
+ else
112
+ log.warn "pattern not match #{raw_value}" unless @suppress_parse_error_log
98
113
  end
99
114
  end
100
115
 
@@ -0,0 +1,39 @@
1
+ module Fluent
2
+ class TextParser
3
+ class KVPairParser
4
+ # key<delim1>value is pair and <pair><delim2><pair> ...
5
+ # newline splits records
6
+ include Configurable
7
+
8
+ config_param :delim1, :string
9
+ config_param :delim2, :string
10
+
11
+ config_param :time_key, :string, :default => "time"
12
+ config_param :time_format, :string, :default => nil # time_format is configurable
13
+
14
+ def configure(conf)
15
+ super
16
+ @time_parser = TimeParser.new(@time_format)
17
+ end
18
+
19
+ def call(text)
20
+ text.split("\n").each do |line|
21
+ pairs = text.split(@delim2)
22
+ record = {}
23
+ time = nil
24
+ pairs.each do |pair|
25
+ k, v = pair.split(@delim1, 2)
26
+ if k == @time_key
27
+ time = @time_parser.parse(v)
28
+ else
29
+ record[k] = v
30
+ end
31
+ end
32
+ yield time, record
33
+ end
34
+ end
35
+ end
36
+
37
+ register_template("kv_pair", Proc.new { KVPairParser.new })
38
+ end
39
+ end
@@ -304,6 +304,26 @@ class ParserOutputTest < Test::Unit::TestCase
304
304
  assert_equal "xxx:first\tyyy:second", first[2]['data']
305
305
  assert_equal 'first', second[2]['xxx']
306
306
  assert_equal 'second2', second[2]['yyy']
307
+
308
+ # convert types
309
+ d = create_driver(CONFIG_LTSV + %[
310
+ types i:integer,s:string,f:float,b:bool
311
+ ], 'foo.baz.test')
312
+ time = Time.parse("2012-04-02 18:20:59").to_i
313
+ d.run do
314
+ d.emit({'data' => "i:1\ts:2\tf:3\tb:true\tx:123"}, time)
315
+ end
316
+ emits = d.emits
317
+ assert_equal 1, emits.length
318
+
319
+ first = emits[0]
320
+ assert_equal 'foo.bar.test', first[0]
321
+ assert_equal time, first[1]
322
+ assert_equal 1, first[2]['i']
323
+ assert_equal '2', first[2]['s']
324
+ assert_equal 3.0, first[2]['f']
325
+ assert_equal true, first[2]['b']
326
+ assert_equal '123', first[2]['x']
307
327
  end
308
328
 
309
329
  CONFIG_TSV = %[
@@ -473,8 +493,6 @@ class ParserOutputTest < Test::Unit::TestCase
473
493
  t = Time.now.to_i
474
494
  d = create_driver(CONFIG_DONT_PARSE_TIME, 'test.in')
475
495
 
476
- assert_equal false, d.instance.instance_eval{ @parser }.instance_eval{ @parser }.time_parse
477
-
478
496
  d.run do
479
497
  d.emit({'data' => '{"time":1383190430, "f1":"v1"}'}, t)
480
498
  d.emit({'data' => '{"time":"1383190430", "f1":"v1"}'}, t)
@@ -515,21 +533,14 @@ class ParserOutputTest < Test::Unit::TestCase
515
533
  end
516
534
  }
517
535
  emits = d.emits
518
- assert_equal 2, emits.length
536
+ assert_equal 1, emits.length
519
537
 
520
538
  assert_equal 'in', emits[0][0]
521
- assert_equal t, emits[0][1]
539
+ assert_equal 0, emits[0][1]
522
540
  assert_equal 'v1', emits[0][2]['f1']
523
- assert_equal [], emits[0][2]['time']
524
-
525
- assert_equal 'in', emits[1][0]
526
- assert_equal t, emits[1][1]
527
- assert_equal 'v1', emits[1][2]['f1']
528
- assert_equal 'thisisnottime', emits[1][2]['time']
541
+ assert_equal 0, emits[0][2]['time'].to_i
529
542
  end
530
543
 
531
-
532
- #TODO: apache2
533
544
  # REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
534
545
 
535
546
  CONFIG_NOT_REPLACE = %[
@@ -619,26 +630,27 @@ class ParserOutputTest < Test::Unit::TestCase
619
630
 
620
631
  def swap_logger(instance)
621
632
  raise "use with block" unless block_given?
622
- parser_logger = instance.parser.log
623
633
  dummy = DummyLogger.new
624
- instance.parser.log = dummy
625
- instance.parser.parser.log = dummy
626
-
627
- restore = if instance.respond_to?("log=".to_sym)
628
- saved_logger = instance.log
629
- instance.log = dummy
630
- lambda{ instance.log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
631
- else
632
- saved_logger = $log
633
- $log = dummy
634
- lambda{ $log = saved_logger; instance.parser.log = instance.parser.parser.log = parser_logger }
635
- end
634
+ saved_logger = instance.log
635
+ instance.log = dummy
636
+ restore = lambda{ instance.log = saved_logger }
636
637
 
637
638
  yield
638
639
 
639
640
  restore.call
640
641
  end
641
642
 
643
+ def test_parser_error_warning
644
+ d = create_driver(CONFIG_INVALID_TIME_VALUE, 'test.in')
645
+ swap_logger(d.instance) do
646
+ assert_raise(DummyLoggerWarnedException) {
647
+ d.run do
648
+ d.emit({'data' => '{"time":[], "f1":"v1"}'}, Time.now.to_i)
649
+ end
650
+ }
651
+ end
652
+ end
653
+
642
654
  def test_suppress_parse_error_log
643
655
  # default(disabled) 'suppress_parse_error_log' is not specify
644
656
  d = create_driver(CONFIG_DEFAULT_SUPPRESS_PARSE_ERROR_LOG, 'test.in')
@@ -0,0 +1,285 @@
1
+ require 'helper'
2
+ require_relative '../custom_parser'
3
+
4
+ class ParserOutputParsersTest < Test::Unit::TestCase
5
+ def setup
6
+ Fluent::Test.setup
7
+ end
8
+
9
+ def create_driver(conf, tag)
10
+ Fluent::Test::OutputTestDriver.new(Fluent::ParserOutput, tag).configure(conf)
11
+ end
12
+
13
+ def test_regexp_parser
14
+ # exists in test_out_parser
15
+ end
16
+
17
+ def test_json_parser
18
+ # exists in test_out_parser
19
+ end
20
+
21
+ def test_tsv_parser
22
+ # exists in test_out_parser
23
+ end
24
+
25
+ def test_ltsv_parser
26
+ # exists in test_out_parser
27
+ end
28
+
29
+ def test_csv_parser
30
+ # exists in test_out_parser
31
+ end
32
+
33
+ def test_none_parser
34
+ d = create_driver(<<EOF, 'test.in')
35
+ remove_prefix test
36
+ add_prefix parsed
37
+ key_name message
38
+ format none
39
+ EOF
40
+ time = Time.parse("2014-11-05 15:59:30").to_i
41
+ d.run do
42
+ d.emit({"message" => "aaaa bbbb cccc 1"}, time)
43
+ d.emit({"message" => "aaaa bbbb cccc 2"}, time)
44
+ d.emit({"message" => "aaaa bbbb cccc 3"}, time)
45
+ d.emit({"message" => "aaaa bbbb cccc 4"}, time)
46
+ end
47
+
48
+ e = d.emits
49
+ assert_equal 4, e.length
50
+
51
+ assert_equal 'parsed.in', e[0][0]
52
+ assert_equal time, e[0][1]
53
+ assert_equal 'aaaa bbbb cccc 1', e[0][2]['message']
54
+
55
+ assert_equal 'parsed.in', e[1][0]
56
+ assert_equal time, e[1][1]
57
+ assert_equal 'aaaa bbbb cccc 2', e[1][2]['message']
58
+
59
+ assert_equal 'parsed.in', e[2][0]
60
+ assert_equal time, e[2][1]
61
+ assert_equal 'aaaa bbbb cccc 3', e[2][2]['message']
62
+
63
+ assert_equal 'parsed.in', e[3][0]
64
+ assert_equal time, e[3][1]
65
+ assert_equal 'aaaa bbbb cccc 4', e[3][2]['message']
66
+ end
67
+
68
+ def test_apache_parser
69
+ log1 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326'
70
+ log2 = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
71
+ log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
72
+
73
+ d = create_driver(<<EOF, 'test.in')
74
+ remove_prefix test
75
+ add_prefix parsed
76
+ key_name message
77
+ format apache
78
+ EOF
79
+ time = Time.parse("2014-11-05 15:59:30").to_i
80
+ d.run do
81
+ d.emit({"message" => log1}, time)
82
+ d.emit({"message" => log2}, time)
83
+ end
84
+
85
+ e = d.emits
86
+ assert_equal 2, e.length
87
+
88
+ assert_equal 'parsed.in', e[0][0]
89
+ assert_equal log_time, e[0][1]
90
+ assert_equal '127.0.0.1', e[0][2]['host']
91
+ assert_equal 'frank', e[0][2]['user']
92
+ assert_equal 'GET', e[0][2]['method']
93
+ assert_equal '/apache_pb.gif', e[0][2]['path']
94
+ assert_equal '200', e[0][2]['code']
95
+ assert_equal '2326', e[0][2]['size']
96
+ assert_nil e[0][2]['referer']
97
+ assert_nil e[0][2]['agent']
98
+
99
+ assert_equal 'parsed.in', e[1][0]
100
+ assert_equal log_time, e[1][1]
101
+ assert_equal '127.0.0.1', e[1][2]['host']
102
+ assert_equal 'frank', e[1][2]['user']
103
+ assert_equal 'GET', e[1][2]['method']
104
+ assert_equal '/apache_pb.gif', e[1][2]['path']
105
+ assert_equal '200', e[1][2]['code']
106
+ assert_equal '2326', e[1][2]['size']
107
+ assert_equal 'http://www.example.com/start.html', e[1][2]['referer']
108
+ assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[1][2]['agent']
109
+ end
110
+
111
+ def test_apache_parser_with_types
112
+ log = '127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "http://www.example.com/start.html" "Mozilla/4.08 [en] (Win98; I ;Nav)"'
113
+ log_time = Time.parse("2000-10-10 13:55:36 -0700").to_i
114
+
115
+ d = create_driver(<<EOF, 'test.in')
116
+ remove_prefix test
117
+ add_prefix parsed
118
+ key_name message
119
+ format apache
120
+ types code:integer,size:integer
121
+ EOF
122
+ time = Time.parse("2014-11-05 15:59:30").to_i
123
+ d.run do
124
+ d.emit({"message" => log}, time)
125
+ end
126
+
127
+ e = d.emits
128
+ assert_equal 1, e.length
129
+
130
+ assert_equal 'parsed.in', e[0][0]
131
+ assert_equal log_time, e[0][1]
132
+ assert_equal '127.0.0.1', e[0][2]['host']
133
+ assert_equal 'frank', e[0][2]['user']
134
+ assert_equal 'GET', e[0][2]['method']
135
+ assert_equal '/apache_pb.gif', e[0][2]['path']
136
+ assert_equal 200, e[0][2]['code']
137
+ assert_equal 2326, e[0][2]['size']
138
+ assert_equal 'http://www.example.com/start.html', e[0][2]['referer']
139
+ assert_equal 'Mozilla/4.08 [en] (Win98; I ;Nav)', e[0][2]['agent']
140
+ end
141
+
142
+ def test_syslog_parser
143
+ loglines = <<LOGS
144
+ Nov 5 16:19:48 myhost.local netbiosd[50]: name servers down?
145
+ Nov 5 16:21:20 myhost.local coreaudiod[320]: Disabled automatic stack shots because audio IO is active
146
+ Nov 5 16:21:20 myhost.local coreaudiod[320]: Enabled automatic stack shots because audio IO is inactive
147
+ LOGS
148
+ logs = loglines.split("\n").reject(&:empty?)
149
+
150
+ d = create_driver(<<EOF, 'test.in')
151
+ remove_prefix test
152
+ add_prefix parsed
153
+ key_name message
154
+ format syslog
155
+ EOF
156
+ time = Time.parse("2014-11-05 15:59:30").to_i
157
+ d.run do
158
+ d.emit({"message" => logs[0]}, time)
159
+ d.emit({"message" => logs[1]}, time)
160
+ d.emit({"message" => logs[2]}, time)
161
+ end
162
+
163
+ emits = d.emits
164
+ assert_equal 3, emits.length
165
+
166
+ e = emits[0]
167
+ assert_equal 'parsed.in', e[0]
168
+ assert_equal Time.parse("2014-11-05 16:19:48").to_i, e[1]
169
+ r = e[2]
170
+ assert_equal 'myhost.local', r['host']
171
+ assert_equal 'netbiosd', r['ident']
172
+ assert_equal '50', r['pid']
173
+ assert_equal 'name servers down?', r['message']
174
+
175
+ e = emits[1]
176
+ assert_equal 'parsed.in', e[0]
177
+ assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
178
+ r = e[2]
179
+ assert_equal 'myhost.local', r['host']
180
+ assert_equal 'coreaudiod', r['ident']
181
+ assert_equal '320', r['pid']
182
+ assert_equal 'Disabled automatic stack shots because audio IO is active', r['message']
183
+
184
+ e = emits[2]
185
+ assert_equal 'parsed.in', e[0]
186
+ assert_equal Time.parse("2014-11-05 16:21:20").to_i, e[1]
187
+ r = e[2]
188
+ assert_equal 'myhost.local', r['host']
189
+ assert_equal 'coreaudiod', r['ident']
190
+ assert_equal '320', r['pid']
191
+ assert_equal 'Enabled automatic stack shots because audio IO is inactive', r['message']
192
+ end
193
+
194
+ def x_test_multiline_parser
195
+ # I can't configure this format well...
196
+ log1 = <<LOG
197
+ *** 2014/11/05 16:33:01 -0700
198
+ host: myhost
199
+ port: 2048
200
+ message: first line
201
+ LOG
202
+ log2 = <<LOG
203
+ *** 2014/11/05 16:33:02 +0900
204
+ host: myhost
205
+ port: 2049
206
+ message: second line
207
+ LOG
208
+ log3 = <<LOG
209
+ *** 2014/11/05 16:43:11 +1100
210
+ LOG
211
+ d = create_driver(<<'EOF', 'test.in')
212
+ remove_prefix test
213
+ add_prefix parsed
214
+ key_name message
215
+ format multiline
216
+ time_format %Y/%m/%d %H:%M:%S %z
217
+ format_firstline /^\*\*\* /
218
+ format1 /\*\*\* (?<time>\d{4}/\d\d/\d\d/ \d\d:\d\d:\d\d [-+]\d{4})/
219
+ format2 /\s*host: (?<host>[^\s]+)/
220
+ format3 /\s*port: (?<port>\d+)/
221
+ format4 /\s*message: (?<message>[^ ]*)/
222
+ EOF
223
+ time = Time.parse("2014-11-05 15:59:30").to_i
224
+ d.run do
225
+ d.emit({"message" => log1}, time)
226
+ d.emit({"message" => log2}, time)
227
+ d.emit({"message" => log3}, time)
228
+ end
229
+
230
+ emits = d.emits
231
+ assert_equal 2, emits.length
232
+
233
+ e = emits[0]
234
+ assert_equal 'parsed.in', e[0]
235
+ assert_equal Time.parse("2014-11-05 16:33:01 -0700").to_i, e[1]
236
+ r = e[2]
237
+ assert_equal 'myhost', r['host']
238
+ assert_equal '2048', r['port']
239
+ assert_equal 'first line', r['message']
240
+
241
+ e = emits[1]
242
+ assert_equal 'parsed.in', e[0]
243
+ assert_equal Time.parse("2014-11-05 16:33:02 +0900").to_i, e[1]
244
+ r = e[2]
245
+ assert_equal 'myhost', r['host']
246
+ assert_equal '2049', r['port']
247
+ assert_equal 'second line', r['message']
248
+ end
249
+
250
+ def test_custom_parser
251
+ d = create_driver(<<'EOF', 'test.in')
252
+ remove_prefix test
253
+ add_prefix parsed
254
+ key_name message
255
+ format kv_pair
256
+ time_format %Y-%m-%d %H:%M:%S %z
257
+ delim1 :
258
+ delim2 ,
259
+ EOF
260
+ time = Time.parse("2014-11-05 15:59:30").to_i
261
+ d.run do
262
+ d.emit({"message" => "k1:v1,k2:v2,k3:1,time:2014-11-05 00:00:00 +0000"}, time)
263
+ d.emit({"message" => "k1:v1,k2:v2,k3:2"}, time) # original time is used
264
+ d.emit({"message" => "k1:v1,k2:v2,k3:3,time:2014-11-05 00:00:00"}, time) # time parse error -> not emitted
265
+ end
266
+ emits = d.emits
267
+ assert_equal 2, emits.length
268
+
269
+ e = emits[0]
270
+ assert_equal 'parsed.in', e[0]
271
+ assert_equal Time.parse("2014-11-05 00:00:00 +0000").to_i, e[1]
272
+ r = e[2]
273
+ assert_equal 'v1', r['k1']
274
+ assert_equal 'v2', r['k2']
275
+ assert_equal '1', r['k3']
276
+
277
+ e = emits[1]
278
+ assert_equal 'parsed.in', e[0]
279
+ assert_equal Time.parse("2014-11-05 15:59:30").to_i, e[1]
280
+ r = e[2]
281
+ assert_equal 'v1', r['k1']
282
+ assert_equal 'v2', r['k2']
283
+ assert_equal '2', r['k3']
284
+ end
285
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-21 00:00:00.000000000 Z
11
+ date: 2014-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: 0.10.54
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: 0.10.54
41
41
  description: fluentd plugin to parse single field, or to combine log structure into
42
42
  single field
43
43
  email:
@@ -53,12 +53,13 @@ files:
53
53
  - README.md
54
54
  - Rakefile
55
55
  - fluent-plugin-parser.gemspec
56
- - lib/fluent/plugin/fixed_parser.rb
57
56
  - lib/fluent/plugin/out_deparser.rb
58
57
  - lib/fluent/plugin/out_parser.rb
58
+ - test/custom_parser.rb
59
59
  - test/helper.rb
60
60
  - test/plugin/test_deparser.rb
61
61
  - test/plugin/test_out_parser.rb
62
+ - test/plugin/test_out_parser_for_parsers.rb
62
63
  homepage: https://github.com/tagomoris/fluent-plugin-parser
63
64
  licenses:
64
65
  - APLv2
@@ -84,6 +85,8 @@ signing_key:
84
85
  specification_version: 4
85
86
  summary: plugin to parse/combine fluentd log messages
86
87
  test_files:
88
+ - test/custom_parser.rb
87
89
  - test/helper.rb
88
90
  - test/plugin/test_deparser.rb
89
91
  - test/plugin/test_out_parser.rb
92
+ - test/plugin/test_out_parser_for_parsers.rb
@@ -1,280 +0,0 @@
1
- #
2
- # This module is copied from fluentd/lib/fluent/parser.rb and
3
- # fixed not to overwrite 'time' (reserve nil) when time not found in parsed string.
4
- module FluentExt; end
5
-
6
- class FluentExt::TextParser
7
- class GenericParser
8
- include Fluent::Configurable
9
-
10
- config_param :time_key, :string, :default => 'time'
11
- config_param :time_format, :string, :default => nil
12
- config_param :time_parse, :bool, :default => true
13
-
14
- attr_accessor :log
15
-
16
- def initialize
17
- super
18
-
19
- @cache1_key = nil
20
- @cache1_time = nil
21
- @cache2_key = nil
22
- @cache2_time = nil
23
-
24
- @log = nil
25
- end
26
-
27
- def parse_time(record)
28
- time = nil
29
-
30
- unless @time_parse
31
- return time, record
32
- end
33
-
34
- if value = record.delete(@time_key)
35
- if @cache1_key == value
36
- time = @cache1_time
37
- elsif @cache2_key == value
38
- time = @cache2_time
39
- else
40
- begin
41
- time = if @time_format
42
- Time.strptime(value, @time_format).to_i
43
- else
44
- Time.parse(value).to_i
45
- end
46
- @cache1_key = @cache2_key
47
- @cache1_time = @cache2_time
48
- @cache2_key = value
49
- @cache2_time = time
50
- rescue TypeError, ArgumentError => e
51
- @log.warn "Failed to parse time", :key => @time_key, :value => value
52
- record[@time_key] = value
53
- end
54
- end
55
- end
56
-
57
- return time, record
58
- end
59
- end
60
-
61
- class RegexpParser < GenericParser
62
- include Fluent::Configurable
63
-
64
- config_param :suppress_parse_error_log, :bool, :default => false
65
-
66
- def initialize(regexp, conf={})
67
- super()
68
- @regexp = regexp
69
- unless conf.empty?
70
- configure(conf)
71
- end
72
- end
73
-
74
- def call(text)
75
- m = @regexp.match(text)
76
- unless m
77
- unless @suppress_parse_error_log
78
- @log.warn "pattern not match: #{text}"
79
- end
80
-
81
- return nil, nil
82
- end
83
-
84
- record = {}
85
- m.names.each {|name|
86
- record[name] = m[name] if m[name]
87
- }
88
- parse_time(record)
89
- end
90
- end
91
-
92
- class JSONParser < GenericParser
93
- def call(text)
94
- record = Yajl.load(text)
95
- return parse_time(record)
96
- rescue Yajl::ParseError
97
- unless @suppress_parse_error_log
98
- @log.warn "pattern not match(json): #{text.inspect}: #{$!}"
99
- end
100
-
101
- return nil, nil
102
- end
103
- end
104
-
105
- class LabeledTSVParser < GenericParser
106
- def call(text)
107
- record = Hash[text.split("\t").map{|p| p.split(":", 2)}]
108
- parse_time(record)
109
- end
110
- end
111
-
112
- class ValuesParser < GenericParser
113
- config_param :keys, :string
114
-
115
- def configure(conf)
116
- super
117
- @keys = @keys.split(",")
118
- end
119
-
120
- def values_map(values)
121
- Hash[@keys.zip(values)]
122
- end
123
- end
124
-
125
- class TSVParser < ValuesParser
126
- config_param :delimiter, :string, :default => "\t"
127
-
128
- def call(text)
129
- return parse_time(values_map(text.split(@delimiter)))
130
- end
131
- end
132
-
133
- class CSVParser < ValuesParser
134
- def initialize
135
- super
136
- require 'csv'
137
- end
138
-
139
- def call(text)
140
- return parse_time(values_map(CSV.parse_line(text)))
141
- end
142
- end
143
-
144
- class ApacheParser < GenericParser
145
- include Fluent::Configurable
146
-
147
- REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
148
-
149
- def initialize
150
- super
151
-
152
- @time_key = "time"
153
- @time_format = "%d/%b/%Y:%H:%M:%S %z"
154
- end
155
-
156
- def call(text)
157
- m = REGEXP.match(text)
158
- unless m
159
- unless @suppress_parse_error_log
160
- @log.warn "pattern not match: #{text.inspect}"
161
- end
162
-
163
- return nil, nil
164
- end
165
-
166
- host = m['host']
167
- host = (host == '-') ? nil : host
168
-
169
- user = m['user']
170
- user = (user == '-') ? nil : user
171
-
172
- time = m['time']
173
-
174
- method = m['method']
175
- path = m['path']
176
-
177
- code = m['code'].to_i
178
- code = nil if code == 0
179
-
180
- size = m['size']
181
- size = (size == '-') ? nil : size.to_i
182
-
183
- referer = m['referer']
184
- referer = (referer == '-') ? nil : referer
185
-
186
- agent = m['agent']
187
- agent = (agent == '-') ? nil : agent
188
-
189
- record = {
190
- "time" => time,
191
- "host" => host,
192
- "user" => user,
193
- "method" => method,
194
- "path" => path,
195
- "code" => code,
196
- "size" => size,
197
- "referer" => referer,
198
- "agent" => agent,
199
- }
200
-
201
- parse_time(record)
202
- end
203
- end
204
-
205
- TEMPLATE_FACTORIES = {
206
- 'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
207
- 'apache2' => Proc.new { ApacheParser.new },
208
- 'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
209
- 'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
210
- 'json' => Proc.new { JSONParser.new },
211
- 'csv' => Proc.new { CSVParser.new },
212
- 'tsv' => Proc.new { TSVParser.new },
213
- 'ltsv' => Proc.new { LabeledTSVParser.new },
214
- }
215
-
216
- def self.register_template(name, regexp_or_proc, time_format=nil)
217
-
218
- factory = if regexp_or_proc.is_a?(Regexp)
219
- regexp = regexp_or_proc
220
- Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
221
- else
222
- Proc.new { proc }
223
- end
224
- TEMPLATE_FACTORIES[name] = factory
225
- end
226
-
227
- attr_accessor :log
228
- attr_reader :parser
229
-
230
- def initialize(logger)
231
- @log = logger
232
- @parser = nil
233
- end
234
-
235
- def configure(conf, required=true)
236
- format = conf['format']
237
-
238
- if format == nil
239
- if required
240
- raise Fluent::ConfigError, "'format' parameter is required"
241
- else
242
- return nil
243
- end
244
- end
245
-
246
- if format[0] == ?/ && format[format.length-1] == ?/
247
- # regexp
248
- begin
249
- regexp = Regexp.new(format[1..-2])
250
- if regexp.named_captures.empty?
251
- raise "No named captures"
252
- end
253
- rescue
254
- raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
255
- end
256
- @parser = RegexpParser.new(regexp)
257
-
258
- else
259
- # built-in template
260
- factory = TEMPLATE_FACTORIES[format]
261
- unless factory
262
- raise Fluent::ConfigError, "Unknown format template '#{format}'"
263
- end
264
- @parser = factory.call
265
-
266
- end
267
-
268
- @parser.log = @log
269
-
270
- if @parser.respond_to?(:configure)
271
- @parser.configure(conf)
272
- end
273
-
274
- return true
275
- end
276
-
277
- def parse(text)
278
- return @parser.call(text)
279
- end
280
- end