fluent-plugin-xml-parser 0.0.8 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed5cb30a4cfc986555ae39795e557a75d5df15af19697f5bc215f42febb12e58
4
- data.tar.gz: b9ec96aac968f460f0800a40da1d526280cf68dc7ec66fe9a77ad6b9499d070b
3
+ metadata.gz: c13b08f4ac7b056b6dc68e9bed2e9ebb644826af34701f654575aafa2a836b43
4
+ data.tar.gz: 5a9c26f558089544bdca1f383ffa1ef5a279a3634d07d3bc9731834a789eb3f6
5
5
  SHA512:
6
- metadata.gz: 4c3b6f464b7c11b775c949704b2f85006ee5490dc3bb0069b24cdb9d08fa4d9a4f40908da61f9d8992de0fedbd295ce4ab1fc6c6ef1c49f0d66ab5e35e0a812d
7
- data.tar.gz: c63e6a37fec38d45a889c784a4e2dd654e2fb711aa14356b8a31f8c40b90afd6adcaa21a12784a4caaebc1c41b85bb6ebbb8cb4f10dffd4e56bc417da0ae781a
6
+ metadata.gz: 7e3763f8a0ce61351b12f943f27278504472fbcf136455c490eed20e467d10e9d2a7f02634a499899c74a79458dca73b3e12d7921ab947db59f92fce48057267
7
+ data.tar.gz: e6fe60b671b11dc54faa26b05d0af6a54eb91fdec1b65c9b7552ff154262db5292cd0b3635869bb3f9c00565ba184aa87a55b6275ad9bd360771f47e7881ed07
data/README.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  Fluent plugin for Parsing XML Input
4
4
 
5
+ ## Requirements
6
+
7
+ | fluent-plugin-xml-parser | Fluentd | Ruby |
8
+ |---------------------------|-------------|--------|
9
+ | >= 1.0.0 | >= v0.14.0 | >= 2.1 |
10
+ | < 1.0.0 | >= v0.12.0 | >= 1.9 |
11
+
5
12
  ## Installation
6
13
 
7
14
  Add this line to your application's Gemfile:
@@ -25,6 +32,33 @@ specified at 'source' directive.
25
32
 
26
33
  The followings are an example configuration for reformatting Libelium SmartCity sensor data to fit ElasticSearch received via MQTT protocol([fluent-plugin-mqtt-io](https://github.com/toyokazu/fluent-plugin-mqtt-io)).
27
34
 
35
+ ### For v1.0
36
+
37
+ Put configuration options in `<parse>` tag
38
+
39
+ ```
40
+
41
+ <source>
42
+ type mqtt
43
+ bind 127.0.0.1
44
+ port 11883
45
+ topic 'Libelium/+/#'
46
+ @label @MQTT_OUT
47
+ <parse>
48
+ type @xml
49
+ time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
50
+ time_key '@timestamp'
51
+ attr_xpaths '[["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
52
+ value_xpaths '[["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
53
+ </parse>
54
+ </source>
55
+
56
+ ```
57
+
58
+ ### For v0.12
59
+
60
+ Use `format` instead of `<parse></parse` and put configuration options in `<source>` tag
61
+
28
62
  ```
29
63
 
30
64
  <source>
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-xml-parser"
7
- spec.version = "0.0.8"
7
+ spec.version = "1.0.0"
8
8
  spec.authors = ["Toyokazu Akiyama", "Brendan McGrath"]
9
9
  spec.email = ["toyokazu@gmail.com", "brendan@redmandi.com"]
10
10
  spec.summary = %q{fluentd xml parser plugin}
@@ -17,8 +17,9 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_development_dependency "bundler", "~> 1.5"
20
+ spec.add_development_dependency "bundler", ">= 1.5"
21
21
  spec.add_development_dependency "rake"
22
- spec.add_development_dependency "fluentd", "< 2"
22
+ spec.add_development_dependency "fluentd", [">= 0.14.0", "< 2"]
23
23
 
24
+ spec.add_runtime_dependency "fluentd", [">= 0.14.0", "< 2"]
24
25
  end
@@ -1,126 +1,123 @@
1
- require 'fluent/parser'
1
+ require 'fluent/plugin/parser'
2
2
  require 'rexml/document'
3
- module Fluent
4
- class TextParser
5
- class XmlParser < Parser
6
- # Register this parser as "xml"
7
- Plugin.register_parser("xml", self)
8
3
 
9
- # How to specify the target attributes and values
10
- # The followings are an example description for Libelium SmartCity sensor data.
11
- #
12
- # time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
13
- # attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
14
- # value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
15
- #
16
- # attr_xpaths indicates attribute name of the target value. Each array with two strings
17
- # means xpath of the attribute name and the attribute of the XML element (name, text etc).
18
- # XPath can be omitted as 'null' and specify your own attribute name as the second
19
- # parameter.
20
- #
21
- # value_xpaths indicates the target value to be extracted. Each array with two strings
22
- # means xpath of the target value and the attribute of the XML element (name, text etc).
23
- # XPath can be omitted as 'null' and specify your own value as the second parameter.
24
- #
25
- # You can check your own XML data structure by using irb or pry
26
- #
27
- # require 'rexml/document'
28
- # doc = REXML::Document.new(open("test.xml"))
29
- # doc.elements['cap:alert/cap:info'].children
30
- #
31
- config_param :time_xpath, :string, :default => nil
32
- config_param :time_key, :string, :default => nil
33
- config_param :time_format, :string, :default => nil # time_format is configurable
34
- config_param :attr_xpaths, :string, :default => '[]'
35
- config_param :value_xpaths, :string, :default => '[]'
36
- # This method is called after config_params have read configuration parameters
37
- def configure(conf)
38
- super
4
+ module Fluent::Plugin
5
+ class XmlParser < Parser
6
+ # Register this parser as "xml"
7
+ Fluent::Plugin.register_parser("xml", self)
39
8
 
40
- if conf['time_xpath'].nil?
41
- @time_xpath = nil
42
- else
43
- @time_xpath = json_parse(conf['time_xpath'])
44
- end
45
- @time_key = conf['time_key']
46
- @time_format = conf['time_format']
47
- @time_parser = TimeParser.new(@time_format)
48
- @attr_xpaths = json_parse(conf['attr_xpaths'])
49
- @value_xpaths = json_parse(conf['value_xpaths'])
50
- # TimeParser class is already given. It takes a single argument as the time format
51
- # to parse the time string with.
9
+ # How to specify the target attributes and values
10
+ # The followings are an example description for Libelium SmartCity sensor data.
11
+ #
12
+ # time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
13
+ # attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
14
+ # value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
15
+ #
16
+ # attr_xpaths indicates attribute name of the target value. Each array with two strings
17
+ # means xpath of the attribute name and the attribute of the XML element (name, text etc).
18
+ # XPath can be omitted as 'null' and specify your own attribute name as the second
19
+ # parameter.
20
+ #
21
+ # value_xpaths indicates the target value to be extracted. Each array with two strings
22
+ # means xpath of the target value and the attribute of the XML element (name, text etc).
23
+ # XPath can be omitted as 'null' and specify your own value as the second parameter.
24
+ #
25
+ # You can check your own XML data structure by using irb or pry
26
+ #
27
+ # require 'rexml/document'
28
+ # doc = REXML::Document.new(open("test.xml"))
29
+ # doc.elements['cap:alert/cap:info'].children
30
+ #
31
+ config_param :time_xpath, :string, :default => nil
32
+ config_param :time_key, :string, :default => nil
33
+ config_param :time_format, :string, :default => nil # time_format is configurable
34
+ config_param :attr_xpaths, :string, :default => '[]'
35
+ config_param :value_xpaths, :string, :default => '[]'
36
+ # This method is called after config_params have read configuration parameters
37
+ def configure(conf)
38
+ super
39
+
40
+ if conf['time_xpath'].nil?
41
+ @time_xpath = nil
42
+ else
43
+ @time_xpath = json_parse(@time_xpath)
52
44
  end
45
+ @time_parser = Fluent::TimeParser.new(@time_format)
46
+ @attr_xpaths = json_parse(@attr_xpaths)
47
+ @value_xpaths = json_parse(@value_xpaths)
48
+ # TimeParser class is already given. It takes a single argument as the time format
49
+ # to parse the time string with.
50
+ end
53
51
 
54
- # This is the main method. The input "text" is the unit of data to be parsed.
55
- # If this is the in_tail plugin, it would be a line. If this is for in_syslog,
56
- # it is a single syslog message.
57
- def parse(text)
58
- begin
59
- doc = REXML::Document.new(text)
60
- $log.debug doc
61
- # parse time field
62
- if @time_xpath.nil?
63
- @time = Fluent::Engine.now
64
- else
65
- @time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
66
- end
67
- record = {}
68
- if !@time_key.nil?
69
- record = {@time_key => format_time(@time)}
70
- end
71
- attrs = @attr_xpaths.map do |attr_xpath|
72
- if attr_xpath[0].nil? # when null is specified
73
- attr_xpath[1] # second parameter is used as the attribute name
74
- else # otherwise, the target attribute name is extracted from XML
75
- el = doc.elements[attr_xpath[0]]
76
- unless el.nil? and attr_xpath.size > 2
77
- el.method(attr_xpath[1]).call
78
- else # unless it's not in the XML and we have a third parameter
79
- attr_xpath[2] # then the third parameter is used as the target value
80
- end
52
+ # This is the main method. The input "text" is the unit of data to be parsed.
53
+ # If this is the in_tail plugin, it would be a line. If this is for in_syslog,
54
+ # it is a single syslog message.
55
+ def parse(text)
56
+ begin
57
+ doc = REXML::Document.new(text)
58
+ $log.debug doc
59
+ # parse time field
60
+ if @time_xpath.nil?
61
+ time = Fluent::Engine.now
62
+ else
63
+ time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
64
+ end
65
+ record = {}
66
+ if !@time_key.nil?
67
+ record = {@time_key => format_time(@time)}
68
+ end
69
+ attrs = @attr_xpaths.map do |attr_xpath|
70
+ if attr_xpath[0].nil? # when null is specified
71
+ attr_xpath[1] # second parameter is used as the attribute name
72
+ else # otherwise, the target attribute name is extracted from XML
73
+ el = doc.elements[attr_xpath[0]]
74
+ unless el.nil? and attr_xpath.size > 2
75
+ el.method(attr_xpath[1]).call
76
+ else # unless it's not in the XML and we have a third parameter
77
+ attr_xpath[2] # then the third parameter is used as the target value
81
78
  end
82
79
  end
83
- values = @value_xpaths.map do |value_xpath|
84
- if value_xpath[0].nil? # when null is specified
85
- value_xpath[1] # second parameter is used as the target value
86
- else # otherwise, the target value is extracted from XML
87
- el = doc.elements[value_xpath[0]]
88
- unless el.nil? and value_xpath.size > 2
89
- el.method(value_xpath[1]).call
90
- else # unless it's not in the XML and we have a third parameter
91
- value_xpath[2] # then the third parameter is used as the target value
92
- end
80
+ end
81
+ values = @value_xpaths.map do |value_xpath|
82
+ if value_xpath[0].nil? # when null is specified
83
+ value_xpath[1] # second parameter is used as the target value
84
+ else # otherwise, the target value is extracted from XML
85
+ el = doc.elements[value_xpath[0]]
86
+ unless el.nil? and value_xpath.size > 2
87
+ el.method(value_xpath[1]).call
88
+ else # unless it's not in the XML and we have a third parameter
89
+ value_xpath[2] # then the third parameter is used as the target value
93
90
  end
94
91
  end
95
- attrs.size.times do |i|
96
- record[attrs[i]] = values[i]
97
- end
98
- yield @time, record
99
- rescue REXML::ParseException => e
100
- $log.warn "Parse error", :error => e.to_s
101
- $log.debug_backtrace(e.backtrace)
102
- rescue Exception => e
103
- $log.warn "error", :error => e.to_s
104
- $log.debug_backtrace(e.backtrace)
105
92
  end
93
+ attrs.size.times do |i|
94
+ record[attrs[i]] = values[i]
95
+ end
96
+ yield time, record
97
+ rescue REXML::ParseException => e
98
+ $log.warn "Parse error", :error => e.to_s
99
+ $log.debug_backtrace(e.backtrace)
100
+ rescue Exception => e
101
+ $log.warn "error", :error => e.to_s
102
+ $log.debug_backtrace(e.backtrace)
106
103
  end
104
+ end
107
105
 
108
- def format_time(time)
109
- if @time_format.nil?
110
- Time.at(time).iso8601
111
- else
112
- Time.at(time).strftime(@time_format)
113
- end
106
+ def format_time(time)
107
+ if @time_format.nil?
108
+ Time.at(time).iso8601
109
+ else
110
+ Time.at(time).strftime(@time_format)
114
111
  end
112
+ end
115
113
 
116
- def json_parse message
117
- begin
118
- y = Yajl::Parser.new
119
- y.parse(message)
120
- rescue
121
- $log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
122
- $log.warn_backtrace $!.backtrace
123
- end
114
+ def json_parse message
115
+ begin
116
+ y = Yajl::Parser.new
117
+ y.parse(message)
118
+ rescue
119
+ $log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
120
+ $log.warn_backtrace $!.backtrace
124
121
  end
125
122
  end
126
123
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-xml-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Toyokazu Akiyama
@@ -15,14 +15,14 @@ dependencies:
15
15
  name: bundler
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - "~>"
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '1.5'
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - "~>"
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.5'
28
28
  - !ruby/object:Gem::Dependency
@@ -43,6 +43,9 @@ dependencies:
43
43
  name: fluentd
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: 0.14.0
46
49
  - - "<"
47
50
  - !ruby/object:Gem::Version
48
51
  version: '2'
@@ -50,6 +53,29 @@ dependencies:
50
53
  prerelease: false
51
54
  version_requirements: !ruby/object:Gem::Requirement
52
55
  requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: 0.14.0
59
+ - - "<"
60
+ - !ruby/object:Gem::Version
61
+ version: '2'
62
+ - !ruby/object:Gem::Dependency
63
+ name: fluentd
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 0.14.0
69
+ - - "<"
70
+ - !ruby/object:Gem::Version
71
+ version: '2'
72
+ type: :runtime
73
+ prerelease: false
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 0.14.0
53
79
  - - "<"
54
80
  - !ruby/object:Gem::Version
55
81
  version: '2'