fluent-plugin-parser 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -52,6 +52,19 @@ Format 'json' is also supported:
52
52
  key_name sales
53
53
  </match>
54
54
 
55
+ Format 'ltsv'(Labeled-TSV (Tab separated values)) is also supported:
56
+
57
+ <match raw.sales.*>
58
+ type parser
59
+ tag sales
60
+ format ltsv
61
+ key_name sales
62
+ </match>
63
+
64
+ 'LTSV' is format like below, unlinke json, easy to write with simple formatter (ex: LogFormat of apache):
65
+
66
+ KEY1:VALUE1 [TAB] KEY2:VALUE2 [TAB] ...
67
+
55
68
  ### DeparserOutput
56
69
 
57
70
  To build CSV from field 'store','item','num', as field 'csv', without raw data:
@@ -1,7 +1,7 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
  Gem::Specification.new do |gem|
3
3
  gem.name = "fluent-plugin-parser"
4
- gem.version = "0.1.2"
4
+ gem.version = "0.2.0"
5
5
  gem.authors = ["TAGOMORI Satoshi"]
6
6
  gem.email = ["tagomoris@gmail.com"]
7
7
  gem.description = %q{fluentd plugin to parse single field, or to combine log structure into single field}
@@ -47,15 +47,13 @@ class FluentExt::TextParser
47
47
  end
48
48
  end
49
49
 
50
- class JSONParser
50
+ class GenericParser
51
51
  include Fluent::Configurable
52
52
 
53
53
  config_param :time_key, :string, :default => 'time'
54
54
  config_param :time_format, :string, :default => nil
55
55
 
56
- def call(text)
57
- record = Yajl.load(text)
58
-
56
+ def parse_time(record)
59
57
  time = nil
60
58
 
61
59
  if value = record.delete(@time_key)
@@ -67,15 +65,30 @@ class FluentExt::TextParser
67
65
  end
68
66
 
69
67
  return time, record
68
+ end
69
+ end
70
+
71
+ class JSONParser < GenericParser
72
+ def call(text)
73
+ record = Yajl.load(text)
74
+ return parse_time(record)
70
75
  rescue Yajl::ParseError
71
76
  return nil, nil
72
77
  end
73
78
  end
74
79
 
80
+ class LabeledTSVParser < GenericParser
81
+ def call(text)
82
+ record = Hash[text.split("\t").map{|p| p.split(":", 2)}]
83
+ parse_time(record)
84
+ end
85
+ end
86
+
75
87
  TEMPLATES = {
76
88
  'apache' => RegexpParser.new(/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/, {'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}),
77
89
  'syslog' => RegexpParser.new(/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?[^\:]*\: *(?<message>.*)$/, {'time_format'=>"%b %d %H:%M:%S"}),
78
90
  'json' => JSONParser.new,
91
+ 'ltsv' => LabeledTSVParser.new,
79
92
  }
80
93
 
81
94
  def self.register_template(name, regexp_or_proc, time_format=nil)
@@ -51,6 +51,22 @@ class ParserOutputTest < Test::Unit::TestCase
51
51
  key_name foo
52
52
  ]
53
53
  }
54
+ assert_nothing_raised {
55
+ d = create_driver %[
56
+ remove_prefix foo.baz
57
+ add_prefix foo.bar
58
+ format json
59
+ key_name foo
60
+ ]
61
+ }
62
+ assert_nothing_raised {
63
+ d = create_driver %[
64
+ remove_prefix foo.baz
65
+ add_prefix foo.bar
66
+ format ltsv
67
+ key_name foo
68
+ ]
69
+ }
54
70
  d = create_driver %[
55
71
  tag foo.bar
56
72
  key_name foo
@@ -189,4 +205,60 @@ class ParserOutputTest < Test::Unit::TestCase
189
205
  assert_equal 'x', second[2]['xxx']
190
206
  assert_equal 'y', second[2]['yyy']
191
207
  end
208
+
209
+ CONFIG_LTSV = %[
210
+ remove_prefix foo.baz
211
+ add_prefix foo.bar
212
+ format ltsv
213
+ key_name data
214
+ ]
215
+ def test_emit_ltsv
216
+ d = create_driver(CONFIG_LTSV, 'foo.baz.test')
217
+ time = Time.parse("2012-04-02 18:20:59").to_i
218
+ d.run do
219
+ d.emit({'data' => "xxx:first\tyyy:second", 'xxx' => 'x', 'yyy' => 'y'}, time)
220
+ d.emit({'data' => "xxx:first\tyyy:second2", 'xxx' => 'x', 'yyy' => 'y'}, time)
221
+ end
222
+ emits = d.emits
223
+ assert_equal 2, emits.length
224
+
225
+ first = emits[0]
226
+ assert_equal 'foo.bar.test', first[0]
227
+ assert_equal time, first[1]
228
+ assert_nil first[2]['data']
229
+ assert_equal 'first', first[2]['xxx']
230
+ assert_equal 'second', first[2]['yyy']
231
+
232
+ second = emits[1]
233
+ assert_equal 'foo.bar.test', second[0]
234
+ assert_equal time, second[1]
235
+ assert_nil first[2]['data']
236
+ assert_equal 'first', second[2]['xxx']
237
+ assert_equal 'second2', second[2]['yyy']
238
+
239
+ d = create_driver(CONFIG_LTSV + %[
240
+ reserve_data yes
241
+ ], 'foo.baz.test')
242
+ time = Time.parse("2012-04-02 18:20:59").to_i
243
+ d.run do
244
+ d.emit({'data' => "xxx:first\tyyy:second", 'xxx' => 'x', 'yyy' => 'y'}, time)
245
+ d.emit({'data' => "xxx:first\tyyy:second2", 'xxx' => 'x', 'yyy' => 'y'}, time)
246
+ end
247
+ emits = d.emits
248
+ assert_equal 2, emits.length
249
+
250
+ first = emits[0]
251
+ assert_equal 'foo.bar.test', first[0]
252
+ assert_equal time, first[1]
253
+ assert_equal "xxx:first\tyyy:second", first[2]['data']
254
+ assert_equal 'first', first[2]['xxx']
255
+ assert_equal 'second', first[2]['yyy']
256
+
257
+ second = emits[1]
258
+ assert_equal 'foo.bar.test', second[0]
259
+ assert_equal time, second[1]
260
+ assert_equal "xxx:first\tyyy:second", first[2]['data']
261
+ assert_equal 'first', second[2]['xxx']
262
+ assert_equal 'second2', second[2]['yyy']
263
+ end
192
264
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-12 00:00:00.000000000 Z
12
+ date: 2013-02-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake