logstash-filter-cleverxml 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 728014a70cda22cc1fdb803c25cbdb4498c7ee9d
4
+ data.tar.gz: b50591f1688faf6d1aa75ef06cca34759d86998b
5
+ SHA512:
6
+ metadata.gz: 68e0c28402a8bff7b126e784eb6e89fc2049c2c83f7a23e6c9878a761fa357c2473cab731a69c6a8cbfa3be48b111871397805016a3f0985250b2c2b1b3dfcab
7
+ data.tar.gz: 239ce573efd1f24fb5149cc79bb01109d8bf39617412adb968e9a3725dceb8f839f4e63f06849b99a67d75436e4ab5bb00dc6720514aee6f4be79e53f9af604b
data/CHANGELOG.md ADDED
@@ -0,0 +1,2 @@
1
+ ## 1.0.0
2
+ - First release.
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012–2015 Elasticsearch <http://www.elastic.co>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # Logstash Clever XML Plugin
2
+
3
+ This is a plugin for [Logstash](https://github.com/elastic/logstash).
4
+
5
+ It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
6
+
7
+ ## Documentation
8
+
9
+ Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elastic.co/guide/en/logstash/current/).
10
+
11
+ - For formatting code or config example, you can use the asciidoc `[source,ruby]` directive
12
+ - For more asciidoc formatting tips, see the excellent reference here https://github.com/elastic/docs#asciidoc-guide
13
+
14
+ ## Contributing
15
+
16
+ All contributions are welcome: ideas, patches, documentation, bug reports, complaints, and even something you drew up on a napkin.
17
+
18
+ Programming is not a required skill. Whatever you've seen about open source and maintainers or community members saying "send patches or die" - you will not see that here.
19
+
20
+ It is more important to the community that you are able to contribute.
21
+
@@ -0,0 +1,163 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+ # XML filter. Takes a field that contains XML and expands it into
6
+ # an actual datastructure.
7
+ class LogStash::Filters::Cleverxml < LogStash::Filters::Base
8
+
9
+ config_name "cleverxml"
10
+
11
+ # Config for xml to hash is:
12
+ #
13
+ # source => source_field
14
+ #
15
+ # For example, if you have the whole xml document in your @message field:
16
+ #
17
+ # filter {
18
+ # xml {
19
+ # source => "message"
20
+ # }
21
+ # }
22
+ #
23
+ # The above would parse the xml from the @message field
24
+ config :source, :validate => :string
25
+
26
+ # Define target for placing the data
27
+ #
28
+ # for example if you want the data to be put in the 'doc' field:
29
+ #
30
+ # filter {
31
+ # xml {
32
+ # target => "doc"
33
+ # }
34
+ # }
35
+ #
36
+ # XML in the value of the source field will be expanded into a
37
+ # datastructure in the "target" field.
38
+ # Note: if the "target" field already exists, it will be overridden
39
+ # Required
40
+ # If target is not defined and store_xml is set to "true", the output
41
+ # will be written to the root of the event
42
+ config :target, :validate => :string
43
+
44
+ # xpath will additionally select string values (.to_s on whatever is selected)
45
+ # from parsed XML (using each source field defined using the method above)
46
+ # and place those values in the destination fields. Configuration:
47
+ #
48
+ # xpath => [ "xpath-syntax", "destination-field" ]
49
+ #
50
+ # Values returned by XPath parsring from xpath-synatx will be put in the
51
+ # destination field. Multiple values returned will be pushed onto the
52
+ # destination field as an array. As such, multiple matches across
53
+ # multiple source fields will produce duplicate entries in the field
54
+ #
55
+ # More on xpath: http://www.w3schools.com/xpath/
56
+ #
57
+ # The xpath functions are particularly powerful:
58
+ # http://www.w3schools.com/xpath/xpath_functions.asp
59
+ #
60
+ config :xpath, :validate => :hash, :default => {}
61
+
62
+ # By default the filter will store the whole parsed xml in the destination
63
+ # field as described above. Setting this to false will prevent that.
64
+ config :store_xml, :validate => :boolean, :default => true
65
+
66
+
67
+ # By default XmlSimple will return an array for each element, even if there is
68
+ # only one value, if you set this option to "false" it will return an array only
69
+ # if there is more than one value
70
+ # Note: if store_xml is set to "true" and target is not defined, the elements will
71
+ # written as array to the root of the message, if you want the elements to be
72
+ # written to the root of the message as single fields set force_array to "true"
73
+ config :force_array, :validate => :boolean, :default => false
74
+
75
+ public
76
+ def register
77
+ require "nokogiri"
78
+ require "xmlsimple"
79
+
80
+ end # def register
81
+
82
+ public
83
+ def filter(event)
84
+ return unless filter?(event)
85
+ matched = false
86
+
87
+ @logger.debug("Running xml filter", :event => event)
88
+
89
+ return unless event.include?(@source)
90
+
91
+ value = event[@source]
92
+
93
+ if value.is_a?(Array) && value.length > 1
94
+ @logger.warn("XML filter only works on fields of length 1",
95
+ :source => @source, :value => value)
96
+ return
97
+ end
98
+
99
+ # Do nothing with an empty string.
100
+ return if value.strip.length == 0
101
+
102
+ if @xpath
103
+ begin
104
+ doc = Nokogiri::XML(value)
105
+ rescue => e
106
+ event.tag("_xmlparsefailure")
107
+ @logger.warn("Trouble parsing xml", :source => @source, :value => value,
108
+ :exception => e, :backtrace => e.backtrace)
109
+ return
110
+ end
111
+
112
+ @xpath.each do |xpath_src, xpath_dest|
113
+ nodeset = doc.xpath(xpath_src)
114
+
115
+ # If asking xpath for a String, like "name(/*)", we get back a
116
+ # String instead of a NodeSet. We normalize that here.
117
+ normalized_nodeset = nodeset.kind_of?(Nokogiri::XML::NodeSet) ? nodeset : [nodeset]
118
+
119
+ normalized_nodeset.each do |value|
120
+ # some XPath functions return empty arrays as string
121
+ if value.is_a?(Array)
122
+ return if value.length == 0
123
+ end
124
+
125
+ unless value.nil?
126
+ matched = true
127
+ event[xpath_dest] ||= []
128
+ event[xpath_dest] << value.to_s
129
+ end
130
+ end # XPath.each
131
+ end # @xpath.each
132
+ end # if @xpath
133
+
134
+ if @target.nil?
135
+ # Default is to write to the root of the event.
136
+ dest = event.to_hash
137
+ else
138
+ dest = event[@target]
139
+ end
140
+
141
+ if @store_xml
142
+ begin
143
+ if dest.nil?
144
+ dest = XmlSimple.xml_in(value,{'ForceArray' => @force_array, 'SuppressEmpty' => true})
145
+ event[@target] = dest
146
+ else
147
+ dest.merge!(XmlSimple.xml_in(value,{'ForceArray' => @force_array, 'SuppressEmpty' => true}))
148
+ end
149
+
150
+ matched = true
151
+ rescue => e
152
+ event.tag("_xmlparsefailure")
153
+ @logger.warn("Trouble parsing xml with XmlSimple", :source => @source,
154
+ :value => value, :exception => e, :backtrace => e.backtrace)
155
+ return
156
+ end
157
+ end # if @store_xml
158
+
159
+ filter_matched(event) if matched
160
+ @logger.debug("Event after xml filter", :event => event)
161
+ end # def filter
162
+ end # class LogStash::Filters::Xml
163
+
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-cleverxml'
4
+ s.version = '1.0.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Takes a field that contains XML and expands it into an actual datastructure."
7
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
8
+ s.authors = ["Clever Age", "William Pottier"]
9
+ s.email = 'wpottier@clever-age.com'
10
+ s.homepage = "http://www.clever-age.com/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','Gemfile','LICENSE']
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "logstash_group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency "logstash-core", ">= 1.4.0", "< 2.0.0"
24
+ s.add_runtime_dependency 'nokogiri'
25
+ s.add_runtime_dependency 'xml-simple'
26
+
27
+ s.add_development_dependency 'logstash-devutils'
28
+ end
29
+
@@ -0,0 +1,209 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/filters/cleverxml"
4
+
5
+ describe LogStash::Filters::Cleverxml do
6
+
7
+ describe "parse standard xml (Deprecated checks)" do
8
+ config <<-CONFIG
9
+ filter {
10
+ cleverxml {
11
+ source => "raw"
12
+ target => "data"
13
+ }
14
+ }
15
+ CONFIG
16
+
17
+ sample("raw" => '<foo key="value"/>') do
18
+ insist { subject["tags"] }.nil?
19
+ insist { subject["data"]} == {"key" => "value"}
20
+ end
21
+
22
+ #From parse xml with array as a value
23
+ sample("raw" => '<foo><key>value1</key><key>value2</key></foo>') do
24
+ insist { subject["tags"] }.nil?
25
+ insist { subject["data"]} == {"key" => ["value1", "value2"]}
26
+ end
27
+
28
+ #From parse xml with hash as a value
29
+ sample("raw" => '<foo><key1><key2>value</key2></key1></foo>') do
30
+ insist { subject["tags"] }.nil?
31
+ insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
32
+ end
33
+
34
+ #From bad xml
35
+ sample("raw" => '<foo /') do
36
+ insist { subject["tags"] }.include?("_xmlparsefailure")
37
+ end
38
+ end
39
+
40
+ describe "parse standard xml but do not store (Deprecated checks)" do
41
+ config <<-CONFIG
42
+ filter {
43
+ cleverxml {
44
+ source => "raw"
45
+ target => "data"
46
+ store_xml => false
47
+ }
48
+ }
49
+ CONFIG
50
+
51
+ sample("raw" => '<foo key="value"/>') do
52
+ insist { subject["tags"] }.nil?
53
+ insist { subject["data"]} == nil
54
+ end
55
+ end
56
+
57
+ describe "parse xml and store values with xpath (Deprecated checks)" do
58
+ config <<-CONFIG
59
+ filter {
60
+ cleverxml {
61
+ source => "raw"
62
+ target => "data"
63
+ xpath => [ "/foo/key/text()", "xpath_field" ]
64
+ }
65
+ }
66
+ CONFIG
67
+
68
+ # Single value
69
+ sample("raw" => '<foo><key>value</key></foo>') do
70
+ insist { subject["tags"] }.nil?
71
+ insist { subject["xpath_field"]} == ["value"]
72
+ end
73
+
74
+ #Multiple values
75
+ sample("raw" => '<foo><key>value1</key><key>value2</key></foo>') do
76
+ insist { subject["tags"] }.nil?
77
+ insist { subject["xpath_field"]} == ["value1","value2"]
78
+ end
79
+ end
80
+
81
+ ## New tests
82
+
83
+ describe "parse standard xml" do
84
+ config <<-CONFIG
85
+ filter {
86
+ cleverxml {
87
+ source => "xmldata"
88
+ target => "data"
89
+ }
90
+ }
91
+ CONFIG
92
+
93
+ sample("xmldata" => '<foo key="value"/>') do
94
+ insist { subject["tags"] }.nil?
95
+ insist { subject["data"]} == {"key" => "value"}
96
+ end
97
+
98
+ #From parse xml with array as a value
99
+ sample("xmldata" => '<foo><key>value1</key><key>value2</key></foo>') do
100
+ insist { subject["tags"] }.nil?
101
+ insist { subject["data"]} == {"key" => ["value1", "value2"]}
102
+ end
103
+
104
+ #From parse xml with hash as a value
105
+ sample("xmldata" => '<foo><key1><key2>value</key2></key1></foo>') do
106
+ insist { subject["tags"] }.nil?
107
+ insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
108
+ end
109
+
110
+ #From bad xml
111
+ sample("xmldata" => '<foo /') do
112
+ insist { subject["tags"] }.include?("_xmlparsefailure")
113
+ end
114
+ end
115
+
116
+ describe "parse standard xml but do not store" do
117
+ config <<-CONFIG
118
+ filter {
119
+ cleverxml {
120
+ source => "xmldata"
121
+ target => "data"
122
+ store_xml => false
123
+ }
124
+ }
125
+ CONFIG
126
+
127
+ sample("xmldata" => '<foo key="value"/>') do
128
+ insist { subject["tags"] }.nil?
129
+ insist { subject["data"]} == nil
130
+ end
131
+ end
132
+
133
+ describe "parse xml and store values with xpath" do
134
+ config <<-CONFIG
135
+ filter {
136
+ cleverxml {
137
+ source => "xmldata"
138
+ target => "data"
139
+ xpath => [ "/foo/key/text()", "xpath_field" ]
140
+ }
141
+ }
142
+ CONFIG
143
+
144
+ # Single value
145
+ sample("xmldata" => '<foo><key>value</key></foo>') do
146
+ insist { subject["tags"] }.nil?
147
+ insist { subject["xpath_field"]} == ["value"]
148
+ end
149
+
150
+ #Multiple values
151
+ sample("xmldata" => '<foo><key>value1</key><key>value2</key></foo>') do
152
+ insist { subject["tags"] }.nil?
153
+ insist { subject["xpath_field"]} == ["value1","value2"]
154
+ end
155
+ end
156
+
157
+ describe "parse correctly non ascii content with xpath" do
158
+ config <<-CONFIG
159
+ filter {
160
+ cleverxml {
161
+ source => "xmldata"
162
+ target => "data"
163
+ xpath => [ "/foo/key/text()", "xpath_field" ]
164
+ }
165
+ }
166
+ CONFIG
167
+
168
+ # Single value
169
+ sample("xmldata" => '<foo><key>Français</key></foo>') do
170
+ insist { subject["tags"] }.nil?
171
+ insist { subject["xpath_field"]} == ["Français"]
172
+ end
173
+ end
174
+
175
+ describe "parse including namespaces" do
176
+ config <<-CONFIG
177
+ filter {
178
+ cleverxml {
179
+ source => "xmldata"
180
+ xpath => [ "/foo/h:div", "xpath_field" ]
181
+ remove_namespaces => false
182
+ }
183
+ }
184
+ CONFIG
185
+
186
+ # Single value
187
+ sample("xmldata" => '<foo xmlns:h="http://www.w3.org/TR/html4/"><h:div>Content</h:div></foo>') do
188
+ insist { subject["xpath_field"] } == ["<h:div>Content</h:div>"]
189
+ end
190
+ end
191
+
192
+ describe "parse removing namespaces" do
193
+ config <<-CONFIG
194
+ filter {
195
+ cleverxml {
196
+ source => "xmldata"
197
+ xpath => [ "/foo/div", "xpath_field" ]
198
+ remove_namespaces => true
199
+ }
200
+ }
201
+ CONFIG
202
+
203
+ # Single value
204
+ sample("xmldata" => '<foo xmlns:h="http://www.w3.org/TR/html4/"><h:div>Content</h:div></foo>') do
205
+ insist { subject["xpath_field"] } == ["<div>Content</div>"]
206
+ end
207
+ end
208
+
209
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-cleverxml
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Clever Age
8
+ - William Pottier
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-10-27 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ name: logstash-core
24
+ prerelease: false
25
+ type: :runtime
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - '>='
29
+ - !ruby/object:Gem::Version
30
+ version: 1.4.0
31
+ - - <
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.0
34
+ - !ruby/object:Gem::Dependency
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ name: nokogiri
41
+ prerelease: false
42
+ type: :runtime
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ - !ruby/object:Gem::Dependency
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ name: xml-simple
55
+ prerelease: false
56
+ type: :runtime
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ name: logstash-devutils
69
+ prerelease: false
70
+ type: :development
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
77
+ email: wpottier@clever-age.com
78
+ executables: []
79
+ extensions: []
80
+ extra_rdoc_files: []
81
+ files:
82
+ - CHANGELOG.md
83
+ - Gemfile
84
+ - LICENSE
85
+ - README.md
86
+ - lib/logstash/filters/cleverxml.rb
87
+ - logstash-filter-cleverxml.gemspec
88
+ - spec/filters/cleverxml_spec.rb
89
+ homepage: http://www.clever-age.com/
90
+ licenses:
91
+ - Apache License (2.0)
92
+ metadata:
93
+ logstash_plugin: 'true'
94
+ logstash_group: filter
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.4.5
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Takes a field that contains XML and expands it into an actual datastructure.
115
+ test_files:
116
+ - spec/filters/cleverxml_spec.rb