fluent-plugin-parser 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -1
- data/fluent-plugin-parser.gemspec +1 -1
- data/lib/fluent/plugin/fixed_parser.rb +111 -25
- data/test/plugin/test_out_parser.rb +54 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-parser"
|
4
|
-
gem.version = "0.2.
|
4
|
+
gem.version = "0.2.1"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
|
@@ -20,8 +20,7 @@ class FluentExt::TextParser
|
|
20
20
|
def call(text)
|
21
21
|
m = @regexp.match(text)
|
22
22
|
unless m
|
23
|
-
$log.
|
24
|
-
# TODO?
|
23
|
+
$log.warn "pattern not match: #{text}"
|
25
24
|
return nil, nil
|
26
25
|
end
|
27
26
|
|
@@ -55,15 +54,13 @@ class FluentExt::TextParser
|
|
55
54
|
|
56
55
|
def parse_time(record)
|
57
56
|
time = nil
|
58
|
-
|
59
57
|
if value = record.delete(@time_key)
|
60
|
-
if @time_format
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
58
|
+
time = if @time_format
|
59
|
+
Time.strptime(value, @time_format).to_i
|
60
|
+
else
|
61
|
+
Time.parse(value).to_i
|
62
|
+
end
|
65
63
|
end
|
66
|
-
|
67
64
|
return time, record
|
68
65
|
end
|
69
66
|
end
|
@@ -73,6 +70,7 @@ class FluentExt::TextParser
|
|
73
70
|
record = Yajl.load(text)
|
74
71
|
return parse_time(record)
|
75
72
|
rescue Yajl::ParseError
|
73
|
+
$log.warn "pattern not match(json): #{text.inspect}: #{$!}"
|
76
74
|
return nil, nil
|
77
75
|
end
|
78
76
|
end
|
@@ -84,22 +82,109 @@ class FluentExt::TextParser
|
|
84
82
|
end
|
85
83
|
end
|
86
84
|
|
87
|
-
|
88
|
-
|
89
|
-
'syslog' => RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}),
|
90
|
-
'json' => JSONParser.new,
|
91
|
-
'ltsv' => LabeledTSVParser.new,
|
92
|
-
}
|
85
|
+
class ValuesParser < GenericParser
|
86
|
+
config_param :keys, :string
|
93
87
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
88
|
+
def configure(conf)
|
89
|
+
super
|
90
|
+
@keys = @keys.split(",")
|
91
|
+
end
|
92
|
+
|
93
|
+
def values_map(values)
|
94
|
+
Hash[@keys.zip(values)]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class TSVParser < ValuesParser
|
99
|
+
config_param :delimiter, :string, :default => "\t"
|
100
|
+
|
101
|
+
def call(text)
|
102
|
+
return parse_time(values_map(text.split(@delimiter)))
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class CSVParser < ValuesParser
|
107
|
+
def initialize
|
108
|
+
super
|
109
|
+
require 'csv'
|
110
|
+
end
|
111
|
+
|
112
|
+
def call(text)
|
113
|
+
return parse_time(values_map(CSV.parse_line(text)))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class ApacheParser
|
118
|
+
include Fluent::Configurable
|
119
|
+
|
120
|
+
REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
121
|
+
|
122
|
+
def call(text)
|
123
|
+
m = REGEXP.match(text)
|
124
|
+
unless m
|
125
|
+
$log.warn "pattern not match: #{text.inspect}"
|
126
|
+
return nil, nil
|
127
|
+
end
|
128
|
+
|
129
|
+
host = m['host']
|
130
|
+
host = (host == '-') ? nil : host
|
131
|
+
|
132
|
+
user = m['user']
|
133
|
+
user = (user == '-') ? nil : user
|
134
|
+
|
135
|
+
time = m['time']
|
136
|
+
time = Time.strptime(time, "%d/%b/%Y:%H:%M:%S %z").to_i
|
137
|
+
|
138
|
+
method = m['method']
|
139
|
+
path = m['path']
|
140
|
+
|
141
|
+
code = m['code'].to_i
|
142
|
+
code = nil if code == 0
|
143
|
+
|
144
|
+
size = m['size']
|
145
|
+
size = (size == '-') ? nil : size.to_i
|
146
|
+
|
147
|
+
referer = m['referer']
|
148
|
+
referer = (referer == '-') ? nil : referer
|
149
|
+
|
150
|
+
agent = m['agent']
|
151
|
+
agent = (agent == '-') ? nil : agent
|
152
|
+
|
153
|
+
record = {
|
154
|
+
"host" => host,
|
155
|
+
"user" => user,
|
156
|
+
"method" => method,
|
157
|
+
"path" => path,
|
158
|
+
"code" => code,
|
159
|
+
"size" => size,
|
160
|
+
"referer" => referer,
|
161
|
+
"agent" => agent,
|
162
|
+
}
|
163
|
+
|
164
|
+
return time, record
|
100
165
|
end
|
166
|
+
end
|
101
167
|
|
102
|
-
|
168
|
+
TEMPLATE_FACTORIES = {
|
169
|
+
'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
170
|
+
'apache2' => Proc.new { ApacheParser.new },
|
171
|
+
'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
172
|
+
'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
|
173
|
+
'json' => Proc.new { JSONParser.new },
|
174
|
+
'csv' => Proc.new { CSVParser.new },
|
175
|
+
'tsv' => Proc.new { TSVParser.new },
|
176
|
+
'ltsv' => Proc.new { LabeledTSVParser.new },
|
177
|
+
}
|
178
|
+
|
179
|
+
def self.register_template(name, regexp_or_proc, time_format=nil)
|
180
|
+
|
181
|
+
factory = if regexp_or_proc.is_a?(Regexp)
|
182
|
+
regexp = regexp_or_proc
|
183
|
+
Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
|
184
|
+
else
|
185
|
+
Proc.new { proc }
|
186
|
+
end
|
187
|
+
TEMPLATE_FACTORIES[name] = factory
|
103
188
|
end
|
104
189
|
|
105
190
|
def initialize
|
@@ -127,15 +212,16 @@ class FluentExt::TextParser
|
|
127
212
|
rescue
|
128
213
|
raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
|
129
214
|
end
|
130
|
-
|
131
215
|
@parser = RegexpParser.new(regexp)
|
132
216
|
|
133
217
|
else
|
134
218
|
# built-in template
|
135
|
-
|
136
|
-
unless
|
219
|
+
factory = TEMPLATE_FACTORIES[format]
|
220
|
+
unless factory
|
137
221
|
raise ConfigError, "Unknown format template '#{format}'"
|
138
222
|
end
|
223
|
+
@parser = factory.call
|
224
|
+
|
139
225
|
end
|
140
226
|
|
141
227
|
if @parser.respond_to?(:configure)
|
@@ -261,4 +261,58 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
261
261
|
assert_equal 'first', second[2]['xxx']
|
262
262
|
assert_equal 'second2', second[2]['yyy']
|
263
263
|
end
|
264
|
+
|
265
|
+
CONFIG_TSV = %[
|
266
|
+
remove_prefix foo.baz
|
267
|
+
add_prefix foo.bar
|
268
|
+
format tsv
|
269
|
+
key_name data
|
270
|
+
keys key1,key2,key3
|
271
|
+
]
|
272
|
+
def test_emit_ltsv
|
273
|
+
d = create_driver(CONFIG_TSV, 'foo.baz.test')
|
274
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
275
|
+
d.run do
|
276
|
+
d.emit({'data' => "value1\tvalue2\tvalueThree", 'xxx' => 'x', 'yyy' => 'y'}, time)
|
277
|
+
end
|
278
|
+
emits = d.emits
|
279
|
+
assert_equal 1, emits.length
|
280
|
+
|
281
|
+
first = emits[0]
|
282
|
+
assert_equal 'foo.bar.test', first[0]
|
283
|
+
assert_equal time, first[1]
|
284
|
+
assert_nil first[2]['data']
|
285
|
+
assert_equal 'value1', first[2]['key1']
|
286
|
+
assert_equal 'value2', first[2]['key2']
|
287
|
+
assert_equal 'valueThree', first[2]['key3']
|
288
|
+
end
|
289
|
+
|
290
|
+
CONFIG_CSV = %[
|
291
|
+
remove_prefix foo.baz
|
292
|
+
add_prefix foo.bar
|
293
|
+
format csv
|
294
|
+
key_name data
|
295
|
+
keys key1,key2,key3
|
296
|
+
]
|
297
|
+
def test_emit_ltsv
|
298
|
+
d = create_driver(CONFIG_CSV, 'foo.baz.test')
|
299
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
300
|
+
d.run do
|
301
|
+
d.emit({'data' => 'value1,"value2","value""ThreeYes!"', 'xxx' => 'x', 'yyy' => 'y'}, time)
|
302
|
+
end
|
303
|
+
emits = d.emits
|
304
|
+
assert_equal 1, emits.length
|
305
|
+
|
306
|
+
first = emits[0]
|
307
|
+
assert_equal 'foo.bar.test', first[0]
|
308
|
+
assert_equal time, first[1]
|
309
|
+
assert_nil first[2]['data']
|
310
|
+
assert_equal 'value1', first[2]['key1']
|
311
|
+
assert_equal 'value2', first[2]['key2']
|
312
|
+
assert_equal 'value"ThreeYes!', first[2]['key3']
|
313
|
+
end
|
314
|
+
|
315
|
+
#TODO: apache2
|
316
|
+
# REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
317
|
+
|
264
318
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|