fluent-plugin-xml-parser 0.0.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed5cb30a4cfc986555ae39795e557a75d5df15af19697f5bc215f42febb12e58
4
- data.tar.gz: b9ec96aac968f460f0800a40da1d526280cf68dc7ec66fe9a77ad6b9499d070b
3
+ metadata.gz: c13b08f4ac7b056b6dc68e9bed2e9ebb644826af34701f654575aafa2a836b43
4
+ data.tar.gz: 5a9c26f558089544bdca1f383ffa1ef5a279a3634d07d3bc9731834a789eb3f6
5
5
  SHA512:
6
- metadata.gz: 4c3b6f464b7c11b775c949704b2f85006ee5490dc3bb0069b24cdb9d08fa4d9a4f40908da61f9d8992de0fedbd295ce4ab1fc6c6ef1c49f0d66ab5e35e0a812d
7
- data.tar.gz: c63e6a37fec38d45a889c784a4e2dd654e2fb711aa14356b8a31f8c40b90afd6adcaa21a12784a4caaebc1c41b85bb6ebbb8cb4f10dffd4e56bc417da0ae781a
6
+ metadata.gz: 7e3763f8a0ce61351b12f943f27278504472fbcf136455c490eed20e467d10e9d2a7f02634a499899c74a79458dca73b3e12d7921ab947db59f92fce48057267
7
+ data.tar.gz: e6fe60b671b11dc54faa26b05d0af6a54eb91fdec1b65c9b7552ff154262db5292cd0b3635869bb3f9c00565ba184aa87a55b6275ad9bd360771f47e7881ed07
data/README.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  Fluent plugin for Parsing XML Input
4
4
 
5
+ ## Requirements
6
+
7
+ | fluent-plugin-xml-parser | Fluentd | Ruby |
8
+ |---------------------------|-------------|--------|
9
+ | >= 1.0.0 | >= v0.14.0 | >= 2.1 |
10
+ | < 1.0.0 | >= v0.12.0 | >= 1.9 |
11
+
5
12
  ## Installation
6
13
 
7
14
  Add this line to your application's Gemfile:
@@ -25,6 +32,33 @@ specified at 'source' directive.
25
32
 
26
33
  The followings are an example configuration for reformatting Libelium SmartCity sensor data to fit ElasticSearch received via MQTT protocol([fluent-plugin-mqtt-io](https://github.com/toyokazu/fluent-plugin-mqtt-io)).
27
34
 
35
+ ### For v1.0
36
+
37
+ Put configuration options in `<parse>` tag
38
+
39
+ ```
40
+
41
+ <source>
42
+ type mqtt
43
+ bind 127.0.0.1
44
+ port 11883
45
+ topic 'Libelium/+/#'
46
+ @label @MQTT_OUT
47
+ <parse>
48
+ type @xml
49
+ time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
50
+ time_key '@timestamp'
51
+ attr_xpaths '[["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
52
+ value_xpaths '[["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
53
+ </parse>
54
+ </source>
55
+
56
+ ```
57
+
58
+ ### For v0.12
59
+
60
+ Use `format` instead of `<parse></parse` and put configuration options in `<source>` tag
61
+
28
62
  ```
29
63
 
30
64
  <source>
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-xml-parser"
7
- spec.version = "0.0.8"
7
+ spec.version = "1.0.0"
8
8
  spec.authors = ["Toyokazu Akiyama", "Brendan McGrath"]
9
9
  spec.email = ["toyokazu@gmail.com", "brendan@redmandi.com"]
10
10
  spec.summary = %q{fluentd xml parser plugin}
@@ -17,8 +17,9 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_development_dependency "bundler", "~> 1.5"
20
+ spec.add_development_dependency "bundler", ">= 1.5"
21
21
  spec.add_development_dependency "rake"
22
- spec.add_development_dependency "fluentd", "< 2"
22
+ spec.add_development_dependency "fluentd", [">= 0.14.0", "< 2"]
23
23
 
24
+ spec.add_runtime_dependency "fluentd", [">= 0.14.0", "< 2"]
24
25
  end
@@ -1,126 +1,123 @@
1
- require 'fluent/parser'
1
+ require 'fluent/plugin/parser'
2
2
  require 'rexml/document'
3
- module Fluent
4
- class TextParser
5
- class XmlParser < Parser
6
- # Register this parser as "xml"
7
- Plugin.register_parser("xml", self)
8
3
 
9
- # How to specify the target attributes and values
10
- # The followings are an example description for Libelium SmartCity sensor data.
11
- #
12
- # time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
13
- # attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
14
- # value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
15
- #
16
- # attr_xpaths indicates attribute name of the target value. Each array with two strings
17
- # means xpath of the attribute name and the attribute of the XML element (name, text etc).
18
- # XPath can be omitted as 'null' and specify your own attribute name as the second
19
- # parameter.
20
- #
21
- # value_xpaths indicates the target value to be extracted. Each array with two strings
22
- # means xpath of the target value and the attribute of the XML element (name, text etc).
23
- # XPath can be omitted as 'null' and specify your own value as the second parameter.
24
- #
25
- # You can check your own XML data structure by using irb or pry
26
- #
27
- # require 'rexml/document'
28
- # doc = REXML::Document.new(open("test.xml"))
29
- # doc.elements['cap:alert/cap:info'].children
30
- #
31
- config_param :time_xpath, :string, :default => nil
32
- config_param :time_key, :string, :default => nil
33
- config_param :time_format, :string, :default => nil # time_format is configurable
34
- config_param :attr_xpaths, :string, :default => '[]'
35
- config_param :value_xpaths, :string, :default => '[]'
36
- # This method is called after config_params have read configuration parameters
37
- def configure(conf)
38
- super
4
+ module Fluent::Plugin
5
+ class XmlParser < Parser
6
+ # Register this parser as "xml"
7
+ Fluent::Plugin.register_parser("xml", self)
39
8
 
40
- if conf['time_xpath'].nil?
41
- @time_xpath = nil
42
- else
43
- @time_xpath = json_parse(conf['time_xpath'])
44
- end
45
- @time_key = conf['time_key']
46
- @time_format = conf['time_format']
47
- @time_parser = TimeParser.new(@time_format)
48
- @attr_xpaths = json_parse(conf['attr_xpaths'])
49
- @value_xpaths = json_parse(conf['value_xpaths'])
50
- # TimeParser class is already given. It takes a single argument as the time format
51
- # to parse the time string with.
9
+ # How to specify the target attributes and values
10
+ # The followings are an example description for Libelium SmartCity sensor data.
11
+ #
12
+ # time_xpath '["cap:alert/cap:info/cap:onset", "text"]'
13
+ # attr_xpaths '[[null, "description"], ["cap:alert/cap:info/cap:parameter/cap:valueName", "text"]]'
14
+ # value_xpaths '[["cap:alert/cap:info/cap:description", "text"], ["cap:alert/cap:info/cap:parameter/cap:value", "text"]]'
15
+ #
16
+ # attr_xpaths indicates attribute name of the target value. Each array with two strings
17
+ # means xpath of the attribute name and the attribute of the XML element (name, text etc).
18
+ # XPath can be omitted as 'null' and specify your own attribute name as the second
19
+ # parameter.
20
+ #
21
+ # value_xpaths indicates the target value to be extracted. Each array with two strings
22
+ # means xpath of the target value and the attribute of the XML element (name, text etc).
23
+ # XPath can be omitted as 'null' and specify your own value as the second parameter.
24
+ #
25
+ # You can check your own XML data structure by using irb or pry
26
+ #
27
+ # require 'rexml/document'
28
+ # doc = REXML::Document.new(open("test.xml"))
29
+ # doc.elements['cap:alert/cap:info'].children
30
+ #
31
+ config_param :time_xpath, :string, :default => nil
32
+ config_param :time_key, :string, :default => nil
33
+ config_param :time_format, :string, :default => nil # time_format is configurable
34
+ config_param :attr_xpaths, :string, :default => '[]'
35
+ config_param :value_xpaths, :string, :default => '[]'
36
+ # This method is called after config_params have read configuration parameters
37
+ def configure(conf)
38
+ super
39
+
40
+ if conf['time_xpath'].nil?
41
+ @time_xpath = nil
42
+ else
43
+ @time_xpath = json_parse(@time_xpath)
52
44
  end
45
+ @time_parser = Fluent::TimeParser.new(@time_format)
46
+ @attr_xpaths = json_parse(@attr_xpaths)
47
+ @value_xpaths = json_parse(@value_xpaths)
48
+ # TimeParser class is already given. It takes a single argument as the time format
49
+ # to parse the time string with.
50
+ end
53
51
 
54
- # This is the main method. The input "text" is the unit of data to be parsed.
55
- # If this is the in_tail plugin, it would be a line. If this is for in_syslog,
56
- # it is a single syslog message.
57
- def parse(text)
58
- begin
59
- doc = REXML::Document.new(text)
60
- $log.debug doc
61
- # parse time field
62
- if @time_xpath.nil?
63
- @time = Fluent::Engine.now
64
- else
65
- @time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
66
- end
67
- record = {}
68
- if !@time_key.nil?
69
- record = {@time_key => format_time(@time)}
70
- end
71
- attrs = @attr_xpaths.map do |attr_xpath|
72
- if attr_xpath[0].nil? # when null is specified
73
- attr_xpath[1] # second parameter is used as the attribute name
74
- else # otherwise, the target attribute name is extracted from XML
75
- el = doc.elements[attr_xpath[0]]
76
- unless el.nil? and attr_xpath.size > 2
77
- el.method(attr_xpath[1]).call
78
- else # unless it's not in the XML and we have a third parameter
79
- attr_xpath[2] # then the third parameter is used as the target value
80
- end
52
+ # This is the main method. The input "text" is the unit of data to be parsed.
53
+ # If this is the in_tail plugin, it would be a line. If this is for in_syslog,
54
+ # it is a single syslog message.
55
+ def parse(text)
56
+ begin
57
+ doc = REXML::Document.new(text)
58
+ $log.debug doc
59
+ # parse time field
60
+ if @time_xpath.nil?
61
+ time = Fluent::Engine.now
62
+ else
63
+ time = @time_parser.parse(doc.elements[@time_xpath[0]].method(@time_xpath[1]).call)
64
+ end
65
+ record = {}
66
+ if !@time_key.nil?
67
+ record = {@time_key => format_time(@time)}
68
+ end
69
+ attrs = @attr_xpaths.map do |attr_xpath|
70
+ if attr_xpath[0].nil? # when null is specified
71
+ attr_xpath[1] # second parameter is used as the attribute name
72
+ else # otherwise, the target attribute name is extracted from XML
73
+ el = doc.elements[attr_xpath[0]]
74
+ unless el.nil? and attr_xpath.size > 2
75
+ el.method(attr_xpath[1]).call
76
+ else # unless it's not in the XML and we have a third parameter
77
+ attr_xpath[2] # then the third parameter is used as the target value
81
78
  end
82
79
  end
83
- values = @value_xpaths.map do |value_xpath|
84
- if value_xpath[0].nil? # when null is specified
85
- value_xpath[1] # second parameter is used as the target value
86
- else # otherwise, the target value is extracted from XML
87
- el = doc.elements[value_xpath[0]]
88
- unless el.nil? and value_xpath.size > 2
89
- el.method(value_xpath[1]).call
90
- else # unless it's not in the XML and we have a third parameter
91
- value_xpath[2] # then the third parameter is used as the target value
92
- end
80
+ end
81
+ values = @value_xpaths.map do |value_xpath|
82
+ if value_xpath[0].nil? # when null is specified
83
+ value_xpath[1] # second parameter is used as the target value
84
+ else # otherwise, the target value is extracted from XML
85
+ el = doc.elements[value_xpath[0]]
86
+ unless el.nil? and value_xpath.size > 2
87
+ el.method(value_xpath[1]).call
88
+ else # unless it's not in the XML and we have a third parameter
89
+ value_xpath[2] # then the third parameter is used as the target value
93
90
  end
94
91
  end
95
- attrs.size.times do |i|
96
- record[attrs[i]] = values[i]
97
- end
98
- yield @time, record
99
- rescue REXML::ParseException => e
100
- $log.warn "Parse error", :error => e.to_s
101
- $log.debug_backtrace(e.backtrace)
102
- rescue Exception => e
103
- $log.warn "error", :error => e.to_s
104
- $log.debug_backtrace(e.backtrace)
105
92
  end
93
+ attrs.size.times do |i|
94
+ record[attrs[i]] = values[i]
95
+ end
96
+ yield time, record
97
+ rescue REXML::ParseException => e
98
+ $log.warn "Parse error", :error => e.to_s
99
+ $log.debug_backtrace(e.backtrace)
100
+ rescue Exception => e
101
+ $log.warn "error", :error => e.to_s
102
+ $log.debug_backtrace(e.backtrace)
106
103
  end
104
+ end
107
105
 
108
- def format_time(time)
109
- if @time_format.nil?
110
- Time.at(time).iso8601
111
- else
112
- Time.at(time).strftime(@time_format)
113
- end
106
+ def format_time(time)
107
+ if @time_format.nil?
108
+ Time.at(time).iso8601
109
+ else
110
+ Time.at(time).strftime(@time_format)
114
111
  end
112
+ end
115
113
 
116
- def json_parse message
117
- begin
118
- y = Yajl::Parser.new
119
- y.parse(message)
120
- rescue
121
- $log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
122
- $log.warn_backtrace $!.backtrace
123
- end
114
+ def json_parse message
115
+ begin
116
+ y = Yajl::Parser.new
117
+ y.parse(message)
118
+ rescue
119
+ $log.error "JSON parse error", :error => $!.to_s, :error_class => $!.class.to_s
120
+ $log.warn_backtrace $!.backtrace
124
121
  end
125
122
  end
126
123
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-xml-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Toyokazu Akiyama
@@ -15,14 +15,14 @@ dependencies:
15
15
  name: bundler
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - "~>"
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '1.5'
21
21
  type: :development
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - "~>"
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.5'
28
28
  - !ruby/object:Gem::Dependency
@@ -43,6 +43,9 @@ dependencies:
43
43
  name: fluentd
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: 0.14.0
46
49
  - - "<"
47
50
  - !ruby/object:Gem::Version
48
51
  version: '2'
@@ -50,6 +53,29 @@ dependencies:
50
53
  prerelease: false
51
54
  version_requirements: !ruby/object:Gem::Requirement
52
55
  requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: 0.14.0
59
+ - - "<"
60
+ - !ruby/object:Gem::Version
61
+ version: '2'
62
+ - !ruby/object:Gem::Dependency
63
+ name: fluentd
64
+ requirement: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 0.14.0
69
+ - - "<"
70
+ - !ruby/object:Gem::Version
71
+ version: '2'
72
+ type: :runtime
73
+ prerelease: false
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: 0.14.0
53
79
  - - "<"
54
80
  - !ruby/object:Gem::Version
55
81
  version: '2'