fluent-plugin-xml-parser 0.0.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -0
- data/fluent-plugin-xml-parser.gemspec +4 -3
- data/lib/fluent/plugin/parser_xml.rb +106 -109
- metadata +29 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c13b08f4ac7b056b6dc68e9bed2e9ebb644826af34701f654575aafa2a836b43
|
4
|
+
data.tar.gz: 5a9c26f558089544bdca1f383ffa1ef5a279a3634d07d3bc9731834a789eb3f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e3763f8a0ce61351b12f943f27278504472fbcf136455c490eed20e467d10e9d2a7f02634a499899c74a79458dca73b3e12d7921ab947db59f92fce48057267
|
7
|
+
data.tar.gz: e6fe60b671b11dc54faa26b05d0af6a54eb91fdec1b65c9b7552ff154262db5292cd0b3635869bb3f9c00565ba184aa87a55b6275ad9bd360771f47e7881ed07
|
data/README.md
CHANGED
@@ -2,6 +2,13 @@
|
|
2
2
|
|
3
3
|
Fluent plugin for Parsing XML Input
|
4
4
|
|
5
|
+
## Requirements
|
6
|
+
|
7
|
+
| fluent-plugin-xml-parser | Fluentd | Ruby |
|
8
|
+
|---------------------------|-------------|--------|
|
9
|
+
| >= 1.0.0 | >= v0.14.0 | >= 2.1 |
|
10
|
+
| < 1.0.0 | >= v0.12.0 | >= 1.9 |
|
11
|
+
|
5
12
|
## Installation
|
6
13
|
|
7
14
|
Add this line to your application's Gemfile:
|
@@ -25,6 +32,33 @@ specified at 'source' directive.
|
|
25
32
|
|
26
33
|
The followings are an example configuration for reformatting Libelium SmartCity sensor data to fit ElasticSearch received via MQTT protocol([fluent-plugin-mqtt-io](https://github.com/toyokazu/fluent-plugin-mqtt-io)).
|
27
34
|
|
35
|
+
### For v1.0
|
36
|
+
|
37
|
+
Put configuration options in `<parse>` tag
|
38
|
+
|
39
|
+
```
|
40
|
+
|
41
|
+
<source>
|
42
|
+
type mqtt
|
43
|
+
bind 127.0.0.1
|
44
|
+
port 11883
|
45
|
+
topic 'Libelium/+/#'
|
46
|
+
@label @MQTT_OUT
|
47
|
+
<parse>
|
48
|
+
type @xml
|
49
|
+
time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
|
50
|
+
time_key '@timestamp'
|
51
|
+
attr_xpaths '[["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
|
52
|
+
value_xpaths '[["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
|
53
|
+
</parse>
|
54
|
+
</source>
|
55
|
+
|
56
|
+
```
|
57
|
+
|
58
|
+
### For v0.12
|
59
|
+
|
60
|
+
Use `format` instead of `<parse></parse` and put configuration options in `<source>` tag
|
61
|
+
|
28
62
|
```
|
29
63
|
|
30
64
|
<source>
|
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-xml-parser"
|
7
|
-
spec.version = "0.0
|
7
|
+
spec.version = "1.0.0"
|
8
8
|
spec.authors = ["Toyokazu Akiyama", "Brendan McGrath"]
|
9
9
|
spec.email = ["toyokazu@gmail.com", "brendan@redmandi.com"]
|
10
10
|
spec.summary = %q{fluentd xml parser plugin}
|
@@ -17,8 +17,9 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
18
|
spec.require_paths = ["lib"]
|
19
19
|
|
20
|
-
spec.add_development_dependency "bundler", "
|
20
|
+
spec.add_development_dependency "bundler", ">= 1.5"
|
21
21
|
spec.add_development_dependency "rake"
|
22
|
-
spec.add_development_dependency "fluentd", "< 2"
|
22
|
+
spec.add_development_dependency "fluentd", [">= 0.14.0", "< 2"]
|
23
23
|
|
24
|
+
spec.add_runtime_dependency "fluentd", [">= 0.14.0", "< 2"]
|
24
25
|
end
|
@@ -1,126 +1,123 @@
|
|
1
|
-
require 'fluent/parser'
|
1
|
+
require 'fluent/plugin/parser'
|
2
2
|
require 'rexml/document'
|
3
|
-
module Fluent
|
4
|
-
class TextParser
|
5
|
-
class XmlParser < Parser
|
6
|
-
# Register this parser as "xml"
|
7
|
-
Plugin.register_parser("xml", self)
|
8
3
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
|
14
|
-
# value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
|
15
|
-
#
|
16
|
-
# attr_xpaths indicates attribute name of the target value. Each array with two strings
|
17
|
-
# means xpath of the attribute name and the attribute of the XML element (name, text etc).
|
18
|
-
# XPath can be omitted as 'null' and specify your own attribute name as the second
|
19
|
-
# parameter.
|
20
|
-
#
|
21
|
-
# value_xpaths indicates the target value to be extracted. Each array with two strings
|
22
|
-
# means xpath of the target value and the attribute of the XML element (name, text etc).
|
23
|
-
# XPath can be omitted as 'null' and specify your own value as the second parameter.
|
24
|
-
#
|
25
|
-
# You can check your own XML data structure by using irb or pry
|
26
|
-
#
|
27
|
-
# require 'rexml/document'
|
28
|
-
# doc = REXML::Document.new(open("test.xml"))
|
29
|
-
# doc.elements['cap:alert/cap:info'].children
|
30
|
-
#
|
31
|
-
config_param :time_xpath, :string, :default => nil
|
32
|
-
config_param :time_key, :string, :default => nil
|
33
|
-
config_param :time_format, :string, :default => nil # time_format is configurable
|
34
|
-
config_param :attr_xpaths, :string, :default => '[]'
|
35
|
-
config_param :value_xpaths, :string, :default => '[]'
|
36
|
-
# This method is called after config_params have read configuration parameters
|
37
|
-
def configure(conf)
|
38
|
-
super
|
4
|
+
module Fluent::Plugin
|
5
|
+
class XmlParser < Parser
|
6
|
+
# Register this parser as "xml"
|
7
|
+
Fluent::Plugin.register_parser("xml", self)
|
39
8
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
9
|
+
# How to specify the target attributes and values
|
10
|
+
# The followings are an example description for Libelium SmartCity sensor data.
|
11
|
+
#
|
12
|
+
# time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
|
13
|
+
# attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
|
14
|
+
# value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
|
15
|
+
#
|
16
|
+
# attr_xpaths indicates attribute name of the target value. Each array with two strings
|
17
|
+
# means xpath of the attribute name and the attribute of the XML element (name, text etc).
|
18
|
+
# XPath can be omitted as 'null' and specify your own attribute name as the second
|
19
|
+
# parameter.
|
20
|
+
#
|
21
|
+
# value_xpaths indicates the target value to be extracted. Each array with two strings
|
22
|
+
# means xpath of the target value and the attribute of the XML element (name, text etc).
|
23
|
+
# XPath can be omitted as 'null' and specify your own value as the second parameter.
|
24
|
+
#
|
25
|
+
# You can check your own XML data structure by using irb or pry
|
26
|
+
#
|
27
|
+
# require 'rexml/document'
|
28
|
+
# doc = REXML::Document.new(open("test.xml"))
|
29
|
+
# doc.elements['cap:alert/cap:info'].children
|
30
|
+
#
|
31
|
+
config_param :time_xpath, :string, :default => nil
|
32
|
+
config_param :time_key, :string, :default => nil
|
33
|
+
config_param :time_format, :string, :default => nil # time_format is configurable
|
34
|
+
config_param :attr_xpaths, :string, :default => '[]'
|
35
|
+
config_param :value_xpaths, :string, :default => '[]'
|
36
|
+
# This method is called after config_params have read configuration parameters
|
37
|
+
def configure(conf)
|
38
|
+
super
|
39
|
+
|
40
|
+
if conf['time_xpath'].nil?
|
41
|
+
@time_xpath = nil
|
42
|
+
else
|
43
|
+
@time_xpath = json_parse(@time_xpath)
|
52
44
|
end
|
45
|
+
@time_parser = Fluent::TimeParser.new(@time_format)
|
46
|
+
@attr_xpaths = json_parse(@attr_xpaths)
|
47
|
+
@value_xpaths = json_parse(@value_xpaths)
|
48
|
+
# TimeParser class is already given. It takes a single argument as the time format
|
49
|
+
# to parse the time string with.
|
50
|
+
end
|
53
51
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
end
|
52
|
+
# This is the main method. The input "text" is the unit of data to be parsed.
|
53
|
+
# If this is the in_tail plugin, it would be a line. If this is for in_syslog,
|
54
|
+
# it is a single syslog message.
|
55
|
+
def parse(text)
|
56
|
+
begin
|
57
|
+
doc = REXML::Document.new(text)
|
58
|
+
$log.debug doc
|
59
|
+
# parse time field
|
60
|
+
if @time_xpath.nil?
|
61
|
+
time = Fluent::Engine.now
|
62
|
+
else
|
63
|
+
time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
|
64
|
+
end
|
65
|
+
record = {}
|
66
|
+
if !@time_key.nil?
|
67
|
+
record = {@time_key => format_time(@time)}
|
68
|
+
end
|
69
|
+
attrs = @attr_xpaths.map do |attr_xpath|
|
70
|
+
if attr_xpath[0].nil? # when null is specified
|
71
|
+
attr_xpath[1] # second parameter is used as the attribute name
|
72
|
+
else # otherwise, the target attribute name is extracted from XML
|
73
|
+
el = doc.elements[attr_xpath[0]]
|
74
|
+
unless el.nil? and attr_xpath.size > 2
|
75
|
+
el.method(attr_xpath[1]).call
|
76
|
+
else # unless it's not in the XML and we have a third parameter
|
77
|
+
attr_xpath[2] # then the third parameter is used as the target value
|
81
78
|
end
|
82
79
|
end
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
80
|
+
end
|
81
|
+
values = @value_xpaths.map do |value_xpath|
|
82
|
+
if value_xpath[0].nil? # when null is specified
|
83
|
+
value_xpath[1] # second parameter is used as the target value
|
84
|
+
else # otherwise, the target value is extracted from XML
|
85
|
+
el = doc.elements[value_xpath[0]]
|
86
|
+
unless el.nil? and value_xpath.size > 2
|
87
|
+
el.method(value_xpath[1]).call
|
88
|
+
else # unless it's not in the XML and we have a third parameter
|
89
|
+
value_xpath[2] # then the third parameter is used as the target value
|
93
90
|
end
|
94
91
|
end
|
95
|
-
attrs.size.times do |i|
|
96
|
-
record[attrs[i]] = values[i]
|
97
|
-
end
|
98
|
-
yield @time, record
|
99
|
-
rescue REXML::ParseException => e
|
100
|
-
$log.warn "Parse error", :error => e.to_s
|
101
|
-
$log.debug_backtrace(e.backtrace)
|
102
|
-
rescue Exception => e
|
103
|
-
$log.warn "error", :error => e.to_s
|
104
|
-
$log.debug_backtrace(e.backtrace)
|
105
92
|
end
|
93
|
+
attrs.size.times do |i|
|
94
|
+
record[attrs[i]] = values[i]
|
95
|
+
end
|
96
|
+
yield time, record
|
97
|
+
rescue REXML::ParseException => e
|
98
|
+
$log.warn "Parse error", :error => e.to_s
|
99
|
+
$log.debug_backtrace(e.backtrace)
|
100
|
+
rescue Exception => e
|
101
|
+
$log.warn "error", :error => e.to_s
|
102
|
+
$log.debug_backtrace(e.backtrace)
|
106
103
|
end
|
104
|
+
end
|
107
105
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
end
|
106
|
+
def format_time(time)
|
107
|
+
if @time_format.nil?
|
108
|
+
Time.at(time).iso8601
|
109
|
+
else
|
110
|
+
Time.at(time).strftime(@time_format)
|
114
111
|
end
|
112
|
+
end
|
115
113
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
end
|
114
|
+
def json_parse message
|
115
|
+
begin
|
116
|
+
y = Yajl::Parser.new
|
117
|
+
y.parse(message)
|
118
|
+
rescue
|
119
|
+
$log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
|
120
|
+
$log.warn_backtrace $!.backtrace
|
124
121
|
end
|
125
122
|
end
|
126
123
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-xml-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Toyokazu Akiyama
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
name: bundler
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '1.5'
|
21
21
|
type: :development
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '1.5'
|
28
28
|
- !ruby/object:Gem::Dependency
|
@@ -43,6 +43,9 @@ dependencies:
|
|
43
43
|
name: fluentd
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 0.14.0
|
46
49
|
- - "<"
|
47
50
|
- !ruby/object:Gem::Version
|
48
51
|
version: '2'
|
@@ -50,6 +53,29 @@ dependencies:
|
|
50
53
|
prerelease: false
|
51
54
|
version_requirements: !ruby/object:Gem::Requirement
|
52
55
|
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: 0.14.0
|
59
|
+
- - "<"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: fluentd
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.14.0
|
69
|
+
- - "<"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '2'
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 0.14.0
|
53
79
|
- - "<"
|
54
80
|
- !ruby/object:Gem::Version
|
55
81
|
version: '2'
|