fluent-plugin-xml-parser 0.0.8 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +34 -0
- data/fluent-plugin-xml-parser.gemspec +4 -3
- data/lib/fluent/plugin/parser_xml.rb +106 -109
- metadata +29 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c13b08f4ac7b056b6dc68e9bed2e9ebb644826af34701f654575aafa2a836b43
|
4
|
+
data.tar.gz: 5a9c26f558089544bdca1f383ffa1ef5a279a3634d07d3bc9731834a789eb3f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e3763f8a0ce61351b12f943f27278504472fbcf136455c490eed20e467d10e9d2a7f02634a499899c74a79458dca73b3e12d7921ab947db59f92fce48057267
|
7
|
+
data.tar.gz: e6fe60b671b11dc54faa26b05d0af6a54eb91fdec1b65c9b7552ff154262db5292cd0b3635869bb3f9c00565ba184aa87a55b6275ad9bd360771f47e7881ed07
|
data/README.md
CHANGED
@@ -2,6 +2,13 @@
|
|
2
2
|
|
3
3
|
Fluent plugin for Parsing XML Input
|
4
4
|
|
5
|
+
## Requirements
|
6
|
+
|
7
|
+
| fluent-plugin-xml-parser | Fluentd | Ruby |
|
8
|
+
|---------------------------|-------------|--------|
|
9
|
+
| >= 1.0.0 | >= v0.14.0 | >= 2.1 |
|
10
|
+
| < 1.0.0 | >= v0.12.0 | >= 1.9 |
|
11
|
+
|
5
12
|
## Installation
|
6
13
|
|
7
14
|
Add this line to your application's Gemfile:
|
@@ -25,6 +32,33 @@ specified at 'source' directive.
|
|
25
32
|
|
26
33
|
The followings are an example configuration for reformatting Libelium SmartCity sensor data to fit ElasticSearch received via MQTT protocol([fluent-plugin-mqtt-io](https://github.com/toyokazu/fluent-plugin-mqtt-io)).
|
27
34
|
|
35
|
+
### For v1.0
|
36
|
+
|
37
|
+
Put configuration options in `<parse>` tag
|
38
|
+
|
39
|
+
```
|
40
|
+
|
41
|
+
<source>
|
42
|
+
type mqtt
|
43
|
+
bind 127.0.0.1
|
44
|
+
port 11883
|
45
|
+
topic 'Libelium/+/#'
|
46
|
+
@label @MQTT_OUT
|
47
|
+
<parse>
|
48
|
+
type @xml
|
49
|
+
time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
|
50
|
+
time_key '@timestamp'
|
51
|
+
attr_xpaths '[["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
|
52
|
+
value_xpaths '[["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
|
53
|
+
</parse>
|
54
|
+
</source>
|
55
|
+
|
56
|
+
```
|
57
|
+
|
58
|
+
### For v0.12
|
59
|
+
|
60
|
+
Use `format` instead of `<parse></parse` and put configuration options in `<source>` tag
|
61
|
+
|
28
62
|
```
|
29
63
|
|
30
64
|
<source>
|
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-xml-parser"
|
7
|
-
spec.version = "0.0
|
7
|
+
spec.version = "1.0.0"
|
8
8
|
spec.authors = ["Toyokazu Akiyama", "Brendan McGrath"]
|
9
9
|
spec.email = ["toyokazu@gmail.com", "brendan@redmandi.com"]
|
10
10
|
spec.summary = %q{fluentd xml parser plugin}
|
@@ -17,8 +17,9 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
18
|
spec.require_paths = ["lib"]
|
19
19
|
|
20
|
-
spec.add_development_dependency "bundler", "
|
20
|
+
spec.add_development_dependency "bundler", ">= 1.5"
|
21
21
|
spec.add_development_dependency "rake"
|
22
|
-
spec.add_development_dependency "fluentd", "< 2"
|
22
|
+
spec.add_development_dependency "fluentd", [">= 0.14.0", "< 2"]
|
23
23
|
|
24
|
+
spec.add_runtime_dependency "fluentd", [">= 0.14.0", "< 2"]
|
24
25
|
end
|
@@ -1,126 +1,123 @@
|
|
1
|
-
require 'fluent/parser'
|
1
|
+
require 'fluent/plugin/parser'
|
2
2
|
require 'rexml/document'
|
3
|
-
module Fluent
|
4
|
-
class TextParser
|
5
|
-
class XmlParser < Parser
|
6
|
-
# Register this parser as "xml"
|
7
|
-
Plugin.register_parser("xml", self)
|
8
3
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
|
14
|
-
# value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
|
15
|
-
#
|
16
|
-
# attr_xpaths indicates attribute name of the target value. Each array with two strings
|
17
|
-
# means xpath of the attribute name and the attribute of the XML element (name, text etc).
|
18
|
-
# XPath can be omitted as 'null' and specify your own attribute name as the second
|
19
|
-
# parameter.
|
20
|
-
#
|
21
|
-
# value_xpaths indicates the target value to be extracted. Each array with two strings
|
22
|
-
# means xpath of the target value and the attribute of the XML element (name, text etc).
|
23
|
-
# XPath can be omitted as 'null' and specify your own value as the second parameter.
|
24
|
-
#
|
25
|
-
# You can check your own XML data structure by using irb or pry
|
26
|
-
#
|
27
|
-
# require 'rexml/document'
|
28
|
-
# doc = REXML::Document.new(open("test.xml"))
|
29
|
-
# doc.elements['cap:alert/cap:info'].children
|
30
|
-
#
|
31
|
-
config_param :time_xpath, :string, :default => nil
|
32
|
-
config_param :time_key, :string, :default => nil
|
33
|
-
config_param :time_format, :string, :default => nil # time_format is configurable
|
34
|
-
config_param :attr_xpaths, :string, :default => '[]'
|
35
|
-
config_param :value_xpaths, :string, :default => '[]'
|
36
|
-
# This method is called after config_params have read configuration parameters
|
37
|
-
def configure(conf)
|
38
|
-
super
|
4
|
+
module Fluent::Plugin
|
5
|
+
class XmlParser < Parser
|
6
|
+
# Register this parser as "xml"
|
7
|
+
Fluent::Plugin.register_parser("xml", self)
|
39
8
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
9
|
+
# How to specify the target attributes and values
|
10
|
+
# The followings are an example description for Libelium SmartCity sensor data.
|
11
|
+
#
|
12
|
+
# time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
|
13
|
+
# attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
|
14
|
+
# value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
|
15
|
+
#
|
16
|
+
# attr_xpaths indicates attribute name of the target value. Each array with two strings
|
17
|
+
# means xpath of the attribute name and the attribute of the XML element (name, text etc).
|
18
|
+
# XPath can be omitted as 'null' and specify your own attribute name as the second
|
19
|
+
# parameter.
|
20
|
+
#
|
21
|
+
# value_xpaths indicates the target value to be extracted. Each array with two strings
|
22
|
+
# means xpath of the target value and the attribute of the XML element (name, text etc).
|
23
|
+
# XPath can be omitted as 'null' and specify your own value as the second parameter.
|
24
|
+
#
|
25
|
+
# You can check your own XML data structure by using irb or pry
|
26
|
+
#
|
27
|
+
# require 'rexml/document'
|
28
|
+
# doc = REXML::Document.new(open("test.xml"))
|
29
|
+
# doc.elements['cap:alert/cap:info'].children
|
30
|
+
#
|
31
|
+
config_param :time_xpath, :string, :default => nil
|
32
|
+
config_param :time_key, :string, :default => nil
|
33
|
+
config_param :time_format, :string, :default => nil # time_format is configurable
|
34
|
+
config_param :attr_xpaths, :string, :default => '[]'
|
35
|
+
config_param :value_xpaths, :string, :default => '[]'
|
36
|
+
# This method is called after config_params have read configuration parameters
|
37
|
+
def configure(conf)
|
38
|
+
super
|
39
|
+
|
40
|
+
if conf['time_xpath'].nil?
|
41
|
+
@time_xpath = nil
|
42
|
+
else
|
43
|
+
@time_xpath = json_parse(@time_xpath)
|
52
44
|
end
|
45
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
46
|
+
@attr_xpaths = json_parse(@attr_xpaths)
|
47
|
+
@value_xpaths = json_parse(@value_xpaths)
|
48
|
+
# TimeParser class is already given. It takes a single argument as the time format
|
49
|
+
# to parse the time string with.
|
50
|
+
end
|
53
51
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
end
|
52
|
+
# This is the main method. The input "text" is the unit of data to be parsed.
|
53
|
+
# If this is the in_tail plugin, it would be a line. If this is for in_syslog,
|
54
|
+
# it is a single syslog message.
|
55
|
+
def parse(text)
|
56
|
+
begin
|
57
|
+
doc = REXML::Document.new(text)
|
58
|
+
$log.debug doc
|
59
|
+
# parse time field
|
60
|
+
if @time_xpath.nil?
|
61
|
+
time = Fluent::Engine.now
|
62
|
+
else
|
63
|
+
time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
|
64
|
+
end
|
65
|
+
record = {}
|
66
|
+
if !@time_key.nil?
|
67
|
+
record = {@time_key => format_time(@time)}
|
68
|
+
end
|
69
|
+
attrs = @attr_xpaths.map do |attr_xpath|
|
70
|
+
if attr_xpath[0].nil? # when null is specified
|
71
|
+
attr_xpath[1] # second parameter is used as the attribute name
|
72
|
+
else # otherwise, the target attribute name is extracted from XML
|
73
|
+
el = doc.elements[attr_xpath[0]]
|
74
|
+
unless el.nil? and attr_xpath.size > 2
|
75
|
+
el.method(attr_xpath[1]).call
|
76
|
+
else # unless it's not in the XML and we have a third parameter
|
77
|
+
attr_xpath[2] # then the third parameter is used as the target value
|
81
78
|
end
|
82
79
|
end
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
80
|
+
end
|
81
|
+
values = @value_xpaths.map do |value_xpath|
|
82
|
+
if value_xpath[0].nil? # when null is specified
|
83
|
+
value_xpath[1] # second parameter is used as the target value
|
84
|
+
else # otherwise, the target value is extracted from XML
|
85
|
+
el = doc.elements[value_xpath[0]]
|
86
|
+
unless el.nil? and value_xpath.size > 2
|
87
|
+
el.method(value_xpath[1]).call
|
88
|
+
else # unless it's not in the XML and we have a third parameter
|
89
|
+
value_xpath[2] # then the third parameter is used as the target value
|
93
90
|
end
|
94
91
|
end
|
95
|
-
attrs.size.times do |i|
|
96
|
-
record[attrs[i]] = values[i]
|
97
|
-
end
|
98
|
-
yield @time, record
|
99
|
-
rescue REXML::ParseException => e
|
100
|
-
$log.warn "Parse error", :error => e.to_s
|
101
|
-
$log.debug_backtrace(e.backtrace)
|
102
|
-
rescue Exception => e
|
103
|
-
$log.warn "error", :error => e.to_s
|
104
|
-
$log.debug_backtrace(e.backtrace)
|
105
92
|
end
|
93
|
+
attrs.size.times do |i|
|
94
|
+
record[attrs[i]] = values[i]
|
95
|
+
end
|
96
|
+
yield time, record
|
97
|
+
rescue REXML::ParseException => e
|
98
|
+
$log.warn "Parse error", :error => e.to_s
|
99
|
+
$log.debug_backtrace(e.backtrace)
|
100
|
+
rescue Exception => e
|
101
|
+
$log.warn "error", :error => e.to_s
|
102
|
+
$log.debug_backtrace(e.backtrace)
|
106
103
|
end
|
104
|
+
end
|
107
105
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
end
|
106
|
+
def format_time(time)
|
107
|
+
if @time_format.nil?
|
108
|
+
Time.at(time).iso8601
|
109
|
+
else
|
110
|
+
Time.at(time).strftime(@time_format)
|
114
111
|
end
|
112
|
+
end
|
115
113
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
end
|
114
|
+
def json_parse message
|
115
|
+
begin
|
116
|
+
y = Yajl::Parser.new
|
117
|
+
y.parse(message)
|
118
|
+
rescue
|
119
|
+
$log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
|
120
|
+
$log.warn_backtrace $!.backtrace
|
124
121
|
end
|
125
122
|
end
|
126
123
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-xml-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Toyokazu Akiyama
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
name: bundler
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '1.5'
|
21
21
|
type: :development
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.5'
|
28
28
|
- !ruby/object:Gem::Dependency
|
@@ -43,6 +43,9 @@ dependencies:
|
|
43
43
|
name: fluentd
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.14.0
|
46
49
|
- - "<"
|
47
50
|
- !ruby/object:Gem::Version
|
48
51
|
version: '2'
|
@@ -50,6 +53,29 @@ dependencies:
|
|
50
53
|
prerelease: false
|
51
54
|
version_requirements: !ruby/object:Gem::Requirement
|
52
55
|
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: 0.14.0
|
59
|
+
- - "<"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: fluentd
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.14.0
|
69
|
+
- - "<"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '2'
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 0.14.0
|
53
79
|
- - "<"
|
54
80
|
- !ruby/object:Gem::Version
|
55
81
|
version: '2'
|