fluent-plugin-parser 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -43,7 +43,7 @@ If you want original attribute-data pair in re-emitted message, specify 'reserve
43
43
  reserve_data yes
44
44
  </match>
45
45
 
46
- Format 'json' is also supported:
46
+ Format 'json', 'csv' and 'tsv' is also supported:
47
47
 
48
48
  <match raw.sales.*>
49
49
  type parser
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.2.0"
4
+ gem.version = "0.2.1"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -20,8 +20,7 @@ class FluentExt::TextParser
20
20
  def call(text)
21
21
  m = @regexp.match(text)
22
22
  unless m
23
- $log.debug "pattern not match: #{text}"
24
- # TODO?
23
+ $log.warn "pattern not match: #{text}"
25
24
  return nil, nil
26
25
  end
27
26
 
@@ -55,15 +54,13 @@ class FluentExt::TextParser
55
54
 
56
55
  def parse_time(record)
57
56
  time = nil
58
-
59
57
  if value = record.delete(@time_key)
60
- if @time_format
61
- time = Time.strptime(value, @time_format).to_i
62
- else
63
- time = value.to_i
64
- end
58
+ time = if @time_format
59
+ Time.strptime(value, @time_format).to_i
60
+ else
61
+ Time.parse(value).to_i
62
+ end
65
63
  end
66
-
67
64
  return time, record
68
65
  end
69
66
  end
@@ -73,6 +70,7 @@ class FluentExt::TextParser
73
70
  record = Yajl.load(text)
74
71
  return parse_time(record)
75
72
  rescue Yajl::ParseError
73
+ $log.warn "pattern not match(json): #{text.inspect}: #{$!}"
76
74
  return nil, nil
77
75
  end
78
76
  end
@@ -84,22 +82,109 @@ class FluentExt::TextParser
84
82
  end
85
83
  end
86
84
 
87
- TEMPLATES = {
88
- 'apache' => RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}),
89
- 'syslog' => RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}),
90
- 'json' => JSONParser.new,
91
- 'ltsv' => LabeledTSVParser.new,
92
- }
85
+ class ValuesParser < GenericParser
86
+ config_param :keys, :string
93
87
 
94
- def self.register_template(name, regexp_or_proc, time_format=nil)
95
- if regexp_or_proc.is_a?(Regexp)
96
- pr = regexp_or_proc
97
- else
98
- regexp = regexp_or_proc
99
- pr = RegexpParser.new(regexp, {'time_format'=>time_format})
88
+ def configure(conf)
89
+ super
90
+ @keys = @keys.split(",")
91
+ end
92
+
93
+ def values_map(values)
94
+ Hash[@keys.zip(values)]
95
+ end
96
+ end
97
+
98
+ class TSVParser < ValuesParser
99
+ config_param :delimiter, :string, :default => "\t"
100
+
101
+ def call(text)
102
+ return parse_time(values_map(text.split(@delimiter)))
103
+ end
104
+ end
105
+
106
+ class CSVParser < ValuesParser
107
+ def initialize
108
+ super
109
+ require 'csv'
110
+ end
111
+
112
+ def call(text)
113
+ return parse_time(values_map(CSV.parse_line(text)))
114
+ end
115
+ end
116
+
117
+ class ApacheParser
118
+ include Fluent::Configurable
119
+
120
+ REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
121
+
122
+ def call(text)
123
+ m = REGEXP.match(text)
124
+ unless m
125
+ $log.warn "pattern not match: #{text.inspect}"
126
+ return nil, nil
127
+ end
128
+
129
+ host = m['host']
130
+ host = (host == '-') ? nil : host
131
+
132
+ user = m['user']
133
+ user = (user == '-') ? nil : user
134
+
135
+ time = m['time']
136
+ time = Time.strptime(time, "%d/%b/%Y:%H:%M:%S %z").to_i
137
+
138
+ method = m['method']
139
+ path = m['path']
140
+
141
+ code = m['code'].to_i
142
+ code = nil if code == 0
143
+
144
+ size = m['size']
145
+ size = (size == '-') ? nil : size.to_i
146
+
147
+ referer = m['referer']
148
+ referer = (referer == '-') ? nil : referer
149
+
150
+ agent = m['agent']
151
+ agent = (agent == '-') ? nil : agent
152
+
153
+ record = {
154
+ "host" => host,
155
+ "user" => user,
156
+ "method" => method,
157
+ "path" => path,
158
+ "code" => code,
159
+ "size" => size,
160
+ "referer" => referer,
161
+ "agent" => agent,
162
+ }
163
+
164
+ return time, record
100
165
  end
166
+ end
101
167
 
102
- TEMPLATES[name] = pr
168
+ TEMPLATE_FACTORIES = {
169
+ 'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
170
+ 'apache2' => Proc.new { ApacheParser.new },
171
+ 'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
172
+ 'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
173
+ 'json' => Proc.new { JSONParser.new },
174
+ 'csv' => Proc.new { CSVParser.new },
175
+ 'tsv' => Proc.new { TSVParser.new },
176
+ 'ltsv' => Proc.new { LabeledTSVParser.new },
177
+ }
178
+
179
+ def self.register_template(name, regexp_or_proc, time_format=nil)
180
+
181
+ factory = if regexp_or_proc.is_a?(Regexp)
182
+ regexp = regexp_or_proc
183
+ Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
184
+ else
185
+ Proc.new { proc }
186
+ end
187
+ TEMPLATE_FACTORIES[name] = factory
103
188
  end
104
189
 
105
190
  def initialize
@@ -127,15 +212,16 @@ class FluentExt::TextParser
127
212
  rescue
128
213
  raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
129
214
  end
130
-
131
215
  @parser = RegexpParser.new(regexp)
132
216
 
133
217
  else
134
218
  # built-in template
135
- @parser = TEMPLATES[format]
136
- unless @parser
219
+ factory = TEMPLATE_FACTORIES[format]
220
+ unless factory
137
221
  raise ConfigError, "Unknown format template '#{format}'"
138
222
  end
223
+ @parser = factory.call
224
+
139
225
  end
140
226
 
141
227
  if @parser.respond_to?(:configure)
@@ -261,4 +261,58 @@ class ParserOutputTest < Test::Unit::TestCase
261
261
  assert_equal 'first', second[2]['xxx']
262
262
  assert_equal 'second2', second[2]['yyy']
263
263
  end
264
+
265
+ CONFIG_TSV = %[
266
+ remove_prefix foo.baz
267
+ add_prefix foo.bar
268
+ format tsv
269
+ key_name data
270
+ keys key1,key2,key3
271
+ ]
272
+ def test_emit_ltsv
273
+ d = create_driver(CONFIG_TSV, 'foo.baz.test')
274
+ time = Time.parse("2012-04-02 18:20:59").to_i
275
+ d.run do
276
+ d.emit({'data' => "value1\tvalue2\tvalueThree", 'xxx' => 'x', 'yyy' => 'y'}, time)
277
+ end
278
+ emits = d.emits
279
+ assert_equal 1, emits.length
280
+
281
+ first = emits[0]
282
+ assert_equal 'foo.bar.test', first[0]
283
+ assert_equal time, first[1]
284
+ assert_nil first[2]['data']
285
+ assert_equal 'value1', first[2]['key1']
286
+ assert_equal 'value2', first[2]['key2']
287
+ assert_equal 'valueThree', first[2]['key3']
288
+ end
289
+
290
+ CONFIG_CSV = %[
291
+ remove_prefix foo.baz
292
+ add_prefix foo.bar
293
+ format csv
294
+ key_name data
295
+ keys key1,key2,key3
296
+ ]
297
+ def test_emit_ltsv
298
+ d = create_driver(CONFIG_CSV, 'foo.baz.test')
299
+ time = Time.parse("2012-04-02 18:20:59").to_i
300
+ d.run do
301
+ d.emit({'data' => 'value1,"value2","value""ThreeYes!"', 'xxx' => 'x', 'yyy' => 'y'}, time)
302
+ end
303
+ emits = d.emits
304
+ assert_equal 1, emits.length
305
+
306
+ first = emits[0]
307
+ assert_equal 'foo.bar.test', first[0]
308
+ assert_equal time, first[1]
309
+ assert_nil first[2]['data']
310
+ assert_equal 'value1', first[2]['key1']
311
+ assert_equal 'value2', first[2]['key2']
312
+ assert_equal 'value"ThreeYes!', first[2]['key3']
313
+ end
314
+
315
+ #TODO: apache2
316
+ # REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
317
+
264
318
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-06 00:00:00.000000000 Z
12
+ date: 2013-02-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake