fluent-plugin-parser 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +1 -1
- data/fluent-plugin-parser.gemspec +1 -1
- data/lib/fluent/plugin/fixed_parser.rb +111 -25
- data/test/plugin/test_out_parser.rb +54 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
2
|
Gem::Specification.new do |gem|
|
3
3
|
gem.name = "fluent-plugin-parser"
|
4
|
-
gem.version = "0.2.
|
4
|
+
gem.version = "0.2.1"
|
5
5
|
gem.authors = ["TAGOMORI Satoshi"]
|
6
6
|
gem.email = ["tagomoris@gmail.com"]
|
7
7
|
gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
|
@@ -20,8 +20,7 @@ class FluentExt::TextParser
|
|
20
20
|
def call(text)
|
21
21
|
m = @regexp.match(text)
|
22
22
|
unless m
|
23
|
-
$log.
|
24
|
-
# TODO?
|
23
|
+
$log.warn "pattern not match: #{text}"
|
25
24
|
return nil, nil
|
26
25
|
end
|
27
26
|
|
@@ -55,15 +54,13 @@ class FluentExt::TextParser
|
|
55
54
|
|
56
55
|
def parse_time(record)
|
57
56
|
time = nil
|
58
|
-
|
59
57
|
if value = record.delete(@time_key)
|
60
|
-
if @time_format
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
58
|
+
time = if @time_format
|
59
|
+
Time.strptime(value, @time_format).to_i
|
60
|
+
else
|
61
|
+
Time.parse(value).to_i
|
62
|
+
end
|
65
63
|
end
|
66
|
-
|
67
64
|
return time, record
|
68
65
|
end
|
69
66
|
end
|
@@ -73,6 +70,7 @@ class FluentExt::TextParser
|
|
73
70
|
record = Yajl.load(text)
|
74
71
|
return parse_time(record)
|
75
72
|
rescue Yajl::ParseError
|
73
|
+
$log.warn "pattern not match(json): #{text.inspect}: #{$!}"
|
76
74
|
return nil, nil
|
77
75
|
end
|
78
76
|
end
|
@@ -84,22 +82,109 @@ class FluentExt::TextParser
|
|
84
82
|
end
|
85
83
|
end
|
86
84
|
|
87
|
-
|
88
|
-
|
89
|
-
'syslog' => RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}),
|
90
|
-
'json' => JSONParser.new,
|
91
|
-
'ltsv' => LabeledTSVParser.new,
|
92
|
-
}
|
85
|
+
class ValuesParser < GenericParser
|
86
|
+
config_param :keys, :string
|
93
87
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
88
|
+
def configure(conf)
|
89
|
+
super
|
90
|
+
@keys = @keys.split(",")
|
91
|
+
end
|
92
|
+
|
93
|
+
def values_map(values)
|
94
|
+
Hash[@keys.zip(values)]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
class TSVParser < ValuesParser
|
99
|
+
config_param :delimiter, :string, :default => "\t"
|
100
|
+
|
101
|
+
def call(text)
|
102
|
+
return parse_time(values_map(text.split(@delimiter)))
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
class CSVParser < ValuesParser
|
107
|
+
def initialize
|
108
|
+
super
|
109
|
+
require 'csv'
|
110
|
+
end
|
111
|
+
|
112
|
+
def call(text)
|
113
|
+
return parse_time(values_map(CSV.parse_line(text)))
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class ApacheParser
|
118
|
+
include Fluent::Configurable
|
119
|
+
|
120
|
+
REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
121
|
+
|
122
|
+
def call(text)
|
123
|
+
m = REGEXP.match(text)
|
124
|
+
unless m
|
125
|
+
$log.warn "pattern not match: #{text.inspect}"
|
126
|
+
return nil, nil
|
127
|
+
end
|
128
|
+
|
129
|
+
host = m['host']
|
130
|
+
host = (host == '-') ? nil : host
|
131
|
+
|
132
|
+
user = m['user']
|
133
|
+
user = (user == '-') ? nil : user
|
134
|
+
|
135
|
+
time = m['time']
|
136
|
+
time = Time.strptime(time, "%d/%b/%Y:%H:%M:%S %z").to_i
|
137
|
+
|
138
|
+
method = m['method']
|
139
|
+
path = m['path']
|
140
|
+
|
141
|
+
code = m['code'].to_i
|
142
|
+
code = nil if code == 0
|
143
|
+
|
144
|
+
size = m['size']
|
145
|
+
size = (size == '-') ? nil : size.to_i
|
146
|
+
|
147
|
+
referer = m['referer']
|
148
|
+
referer = (referer == '-') ? nil : referer
|
149
|
+
|
150
|
+
agent = m['agent']
|
151
|
+
agent = (agent == '-') ? nil : agent
|
152
|
+
|
153
|
+
record = {
|
154
|
+
"host" => host,
|
155
|
+
"user" => user,
|
156
|
+
"method" => method,
|
157
|
+
"path" => path,
|
158
|
+
"code" => code,
|
159
|
+
"size" => size,
|
160
|
+
"referer" => referer,
|
161
|
+
"agent" => agent,
|
162
|
+
}
|
163
|
+
|
164
|
+
return time, record
|
100
165
|
end
|
166
|
+
end
|
101
167
|
|
102
|
-
|
168
|
+
TEMPLATE_FACTORIES = {
|
169
|
+
'apache' => Proc.new { RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
170
|
+
'apache2' => Proc.new { ApacheParser.new },
|
171
|
+
'nginx' => Proc.new { RegexpParser.new(/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}) },
|
172
|
+
'syslog' => Proc.new { RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}) },
|
173
|
+
'json' => Proc.new { JSONParser.new },
|
174
|
+
'csv' => Proc.new { CSVParser.new },
|
175
|
+
'tsv' => Proc.new { TSVParser.new },
|
176
|
+
'ltsv' => Proc.new { LabeledTSVParser.new },
|
177
|
+
}
|
178
|
+
|
179
|
+
def self.register_template(name, regexp_or_proc, time_format=nil)
|
180
|
+
|
181
|
+
factory = if regexp_or_proc.is_a?(Regexp)
|
182
|
+
regexp = regexp_or_proc
|
183
|
+
Proc.new { RegexpParser.new(regexp, {'time_format'=>time_format}) }
|
184
|
+
else
|
185
|
+
Proc.new { proc }
|
186
|
+
end
|
187
|
+
TEMPLATE_FACTORIES[name] = factory
|
103
188
|
end
|
104
189
|
|
105
190
|
def initialize
|
@@ -127,15 +212,16 @@ class FluentExt::TextParser
|
|
127
212
|
rescue
|
128
213
|
raise Fluent::ConfigError, "Invalid regexp '#{format[1..-2]}': #{$!}"
|
129
214
|
end
|
130
|
-
|
131
215
|
@parser = RegexpParser.new(regexp)
|
132
216
|
|
133
217
|
else
|
134
218
|
# built-in template
|
135
|
-
|
136
|
-
unless
|
219
|
+
factory = TEMPLATE_FACTORIES[format]
|
220
|
+
unless factory
|
137
221
|
raise ConfigError, "Unknown format template '#{format}'"
|
138
222
|
end
|
223
|
+
@parser = factory.call
|
224
|
+
|
139
225
|
end
|
140
226
|
|
141
227
|
if @parser.respond_to?(:configure)
|
@@ -261,4 +261,58 @@ class ParserOutputTest < Test::Unit::TestCase
|
|
261
261
|
assert_equal 'first', second[2]['xxx']
|
262
262
|
assert_equal 'second2', second[2]['yyy']
|
263
263
|
end
|
264
|
+
|
265
|
+
CONFIG_TSV = %[
|
266
|
+
remove_prefix foo.baz
|
267
|
+
add_prefix foo.bar
|
268
|
+
format tsv
|
269
|
+
key_name data
|
270
|
+
keys key1,key2,key3
|
271
|
+
]
|
272
|
+
def test_emit_ltsv
|
273
|
+
d = create_driver(CONFIG_TSV, 'foo.baz.test')
|
274
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
275
|
+
d.run do
|
276
|
+
d.emit({'data' => "value1\tvalue2\tvalueThree", 'xxx' => 'x', 'yyy' => 'y'}, time)
|
277
|
+
end
|
278
|
+
emits = d.emits
|
279
|
+
assert_equal 1, emits.length
|
280
|
+
|
281
|
+
first = emits[0]
|
282
|
+
assert_equal 'foo.bar.test', first[0]
|
283
|
+
assert_equal time, first[1]
|
284
|
+
assert_nil first[2]['data']
|
285
|
+
assert_equal 'value1', first[2]['key1']
|
286
|
+
assert_equal 'value2', first[2]['key2']
|
287
|
+
assert_equal 'valueThree', first[2]['key3']
|
288
|
+
end
|
289
|
+
|
290
|
+
CONFIG_CSV = %[
|
291
|
+
remove_prefix foo.baz
|
292
|
+
add_prefix foo.bar
|
293
|
+
format csv
|
294
|
+
key_name data
|
295
|
+
keys key1,key2,key3
|
296
|
+
]
|
297
|
+
def test_emit_ltsv
|
298
|
+
d = create_driver(CONFIG_CSV, 'foo.baz.test')
|
299
|
+
time = Time.parse("2012-04-02 18:20:59").to_i
|
300
|
+
d.run do
|
301
|
+
d.emit({'data' => 'value1,"value2","value""ThreeYes!"', 'xxx' => 'x', 'yyy' => 'y'}, time)
|
302
|
+
end
|
303
|
+
emits = d.emits
|
304
|
+
assert_equal 1, emits.length
|
305
|
+
|
306
|
+
first = emits[0]
|
307
|
+
assert_equal 'foo.bar.test', first[0]
|
308
|
+
assert_equal time, first[1]
|
309
|
+
assert_nil first[2]['data']
|
310
|
+
assert_equal 'value1', first[2]['key1']
|
311
|
+
assert_equal 'value2', first[2]['key2']
|
312
|
+
assert_equal 'value"ThreeYes!', first[2]['key3']
|
313
|
+
end
|
314
|
+
|
315
|
+
#TODO: apache2
|
316
|
+
# REGEXP = /^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/
|
317
|
+
|
264
318
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|