fluent-plugin-parser 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -43,7 +43,7 @@ If you want original attribute-data pair in re-emitted message, specify 'reserve
43
43
  reserve_data yes
44
44
  </match>
45
45
 
46
- Format 'json' is also supported:
46
+ Format 'json', 'csv' and 'tsv' is also supported:
47
47
 
48
48
  <match raw.sales.*>
49
49
  type parser
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.2.0"
4
+ gem.version = "0.2.1"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -20,8 +20,7 @@ class FluentExt::TextParser
20
20
  def call(text)
21
21
  m = @regexp.match(text)
22
22
  unless m
23
- $log.debug "pattern not match: #{text}"
24
- # TODO?
23
+ $log.warn "pattern not match: #{text}"
25
24
  return nil, nil
26
25
  end
27
26
 
@@ -55,15 +54,13 @@ class FluentExt::TextParser
55
54
 
56
55
  def parse_time(record)
57
56
  time = nil
58
-
59
57
  if value = record.delete(@time_key)
60
- if @time_format
61
- time = Time.strptime(value, @time_format).to_i
62
- else
63
- time = value.to_i
64
- end
58
+ time = if @time_format
59
+ Time.strptime(value, @time_format).to_i
60
+ else
61
+ Time.parse(value).to_i
62
+ end
65
63
  end
66
-
67
64
  return time, record
68
65
  end
69
66
  end
@@ -73,6 +70,7 @@ class FluentExt::TextParser
73
70
  record = Yajl.load(text)
74
71
  return parse_time(record)
75
72
  rescue Yajl::ParseError
73
+ $log.warn "pattern not match(json): #{text.inspect}: #{$!}"
76
74
  return nil, nil
77
75
  end
78
76
  end
@@ -84,22 +82,109 @@ class FluentExt::TextParser
84
82
  end
85
83
  end
86
84
 
87
- TEMPLATES = {
88
- 'apache' => RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}),
89
- 'syslog' => RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}),
90
- 'json' => JSONParser.new,
91
- 'ltsv' => LabeledTSVParser.new,
92
- }
85
+ class ValuesParser < GenericParser
86
+ config_param :keys, :string
93
87
 
94
- def self.register_template(name, regexp_or_proc, time_format=nil)
95
- if regexp_or_proc.is_a?(Regexp)
96
- pr = regexp_or_proc
97
- else
98
- regexp = regexp_or_proc
99
- pr = RegexpParser.new(regexp, {'time_format'=>time_format})
88
+ def configure(conf)
89
+ super
90
+ @keys = @keys.split(",")
91
+ end
92
+
93
+ def values_map(values)
94
+ Hash[@keys.zip(values)]
95
+ end
96
+ end
97
+
98
+ class TSVParser < ValuesParser
99
+ config_param :delimiter, :string, :default => "\t"
100
+
101
+ def call(text)
102
+ return parse_time(values_map(text.split(@delimiter)))
103
+ end
104
+ end
105
+
106
+ class CSVParser < ValuesParser
107
+ def initialize
108
+ super
109
+ require 'csv'
110
+ end
111
+
112
+ def call(text)
113
+ return parse_time(values_map(CSV.parse_line(text)))
114
+ end
115
+ end
116
+
117
+ class ApacheParser
118
+ include Fluent::Configurable
119
+
120
+ REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
121
+
122
+ def call(text)
123
+ m = REGEXP.match(text)
124
+ unless m
125
+ $log.warn "pattern not match: #{text.inspect}"
126
+ return nil, nil
127
+ end
128
+
129
+ host = m['host']
130
+ host = (host == '-') ? nil : host
131
+
132
+ user = m['user']
133
+ user = (user == '-') ? nil : user
134
+
135
+ time = m['time']
136
+ time = Time.strptime(time, "%d/%b/%Y:%H:%M:%S %z").to_i
137
+
138
+ method = m['method']
139
+ path = m['path']
140
+
141
+ code = m['code'].to_i
142
+ code = nil if code == 0
143
+
144
+ size = m['size']
145
+ size = (size == '-') ? nil : size.to_i
146
+
147
+ referer = m['referer']
148
+ referer = (referer == '-') ? nil : referer
149
+
150
+ agent = m['agent']
151
+ agent = (agent == '-') ? nil : agent
152
+
153
+ record = {
154
+ "host" => host,
155
+ "user" => user,
156
+ "method" => method,
157
+ "path" => path,
158
+ "code" => code,
159
+ "size" => size,
160
+ "referer" => referer,
161
+ "agent" => agent,
162
+ }
163
+
164
+ return time, record
100
165
  end
166
+ end
101
167
 
102
- TEMPLATES[name] = pr
168
+ TEMPLATE_FACTORIES = {
169
+ 'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
170
+ 'apache2' => Proc.new { ApacheParser.new },
171
+ 'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
172
+ 'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
173
+ 'json' => Proc.new { JSONParser.new },
174
+ 'csv' => Proc.new { CSVParser.new },
175
+ 'tsv' => Proc.new { TSVParser.new },
176
+ 'ltsv' => Proc.new { LabeledTSVParser.new },
177
+ }
178
+
179
+ def self.register_template(name, regexp_or_proc, time_format=nil)
180
+
181
+ factory = if regexp_or_proc.is_a?(Regexp)
182
+ regexp = regexp_or_proc
183
+ Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
184
+ else
185
+ Proc.new { proc }
186
+ end
187
+ TEMPLATE_FACTORIES[name] = factory
103
188
  end
104
189
 
105
190
  def initialize
@@ -127,15 +212,16 @@ class FluentExt::TextParser
127
212
  rescue
128
213
  raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
129
214
  end
130
-
131
215
  @parser = RegexpParser.new(regexp)
132
216
 
133
217
  else
134
218
  # built-in template
135
- @parser = TEMPLATES[format]
136
- unless @parser
219
+ factory = TEMPLATE_FACTORIES[format]
220
+ unless factory
137
221
  raise ConfigError, "Unknown format template '#{format}'"
138
222
  end
223
+ @parser = factory.call
224
+
139
225
  end
140
226
 
141
227
  if @parser.respond_to?(:configure)
@@ -261,4 +261,58 @@ class ParserOutputTest < Test::Unit::TestCase
261
261
  assert_equal 'first', second[2]['xxx']
262
262
  assert_equal 'second2', second[2]['yyy']
263
263
  end
264
+
265
+ CONFIG_TSV = %[
266
+ remove_prefix foo.baz
267
+ add_prefix foo.bar
268
+ format tsv
269
+ key_name data
270
+ keys key1,key2,key3
271
+ ]
272
+ def test_emit_ltsv
273
+ d = create_driver(CONFIG_TSV, 'foo.baz.test')
274
+ time = Time.parse("2012-04-02 18:20:59").to_i
275
+ d.run do
276
+ d.emit({'data' => "value1\tvalue2\tvalueThree", 'xxx' => 'x', 'yyy' => 'y'}, time)
277
+ end
278
+ emits = d.emits
279
+ assert_equal 1, emits.length
280
+
281
+ first = emits[0]
282
+ assert_equal 'foo.bar.test', first[0]
283
+ assert_equal time, first[1]
284
+ assert_nil first[2]['data']
285
+ assert_equal 'value1', first[2]['key1']
286
+ assert_equal 'value2', first[2]['key2']
287
+ assert_equal 'valueThree', first[2]['key3']
288
+ end
289
+
290
+ CONFIG_CSV = %[
291
+ remove_prefix foo.baz
292
+ add_prefix foo.bar
293
+ format csv
294
+ key_name data
295
+ keys key1,key2,key3
296
+ ]
297
+ def test_emit_ltsv
298
+ d = create_driver(CONFIG_CSV, 'foo.baz.test')
299
+ time = Time.parse("2012-04-02 18:20:59").to_i
300
+ d.run do
301
+ d.emit({'data' => 'value1,"value2","value""ThreeYes!"', 'xxx' => 'x', 'yyy' => 'y'}, time)
302
+ end
303
+ emits = d.emits
304
+ assert_equal 1, emits.length
305
+
306
+ first = emits[0]
307
+ assert_equal 'foo.bar.test', first[0]
308
+ assert_equal time, first[1]
309
+ assert_nil first[2]['data']
310
+ assert_equal 'value1', first[2]['key1']
311
+ assert_equal 'value2', first[2]['key2']
312
+ assert_equal 'value"ThreeYes!', first[2]['key3']
313
+ end
314
+
315
+ #TODO: apache2
316
+ # REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
317
+
264
318
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-06 00:00:00.000000000 Z
12
+ date: 2013-02-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake