logstash-filter-xml 2.0.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -2
- data/README.md +3 -0
- data/lib/logstash/filters/xml.rb +63 -32
- data/logstash-filter-xml.gemspec +1 -1
- data/spec/filters/xml_spec.rb +66 -0
- metadata +17 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0af48f46dcce1e66c87380592f9b490d0c377aae
|
4
|
+
data.tar.gz: 41ebeb840603ae50e82be8ef300d1de47c681a2f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64f80c71aa98dd82c1b846799b23aa5402518c7886059c0d01dde2789fe982076bcebce470dbeb2d7fd869e14dfa530a7185114b5a7532de4715614ef64e50ea
|
7
|
+
data.tar.gz: 03005e58d2645fbfdab2826a962a6ba7fbd9a4f2a8f48bebe822ae4a5faf3c06c57c67e8135a77178ca36fff44f395ff5a0932c013d3cc79fa425b12ab89a716
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
|
+
## 2.1.1
|
2
|
+
- Refactored field references, code cleanups
|
3
|
+
|
4
|
+
## 2.1.0
|
5
|
+
- Support for namespace declarations to use parsing the XML document
|
6
|
+
|
1
7
|
## 2.0.0
|
2
|
-
- Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
|
8
|
+
- Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
|
3
9
|
instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
|
4
10
|
- Dependency on logstash-core update to 2.0
|
5
|
-
|
data/README.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# Logstash Plugin
|
2
2
|
|
3
|
+
[](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-xml-unit/)
|
5
|
+
|
3
6
|
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
4
7
|
|
5
8
|
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
data/lib/logstash/filters/xml.rb
CHANGED
@@ -62,49 +62,73 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
62
62
|
# field as described above. Setting this to false will prevent that.
|
63
63
|
config :store_xml, :validate => :boolean, :default => true
|
64
64
|
|
65
|
+
# By default only namespaces declarations on the root element are considered.
|
66
|
+
# This allows to configure all namespace declarations to parse the XML document.
|
67
|
+
#
|
68
|
+
# Example:
|
69
|
+
#
|
70
|
+
# [source,ruby]
|
71
|
+
# filter {
|
72
|
+
# xml {
|
73
|
+
# namespaces => {
|
74
|
+
# "xsl" => "http://www.w3.org/1999/XSL/Transform"
|
75
|
+
# "xhtml" => http://www.w3.org/1999/xhtml"
|
76
|
+
# }
|
77
|
+
# }
|
78
|
+
# }
|
79
|
+
#
|
80
|
+
config :namespaces, :validate => :hash, :default => {}
|
81
|
+
|
65
82
|
# Remove all namespaces from all nodes in the document.
|
66
83
|
# Of course, if the document had nodes with the same names but different namespaces, they will now be ambiguous.
|
67
84
|
config :remove_namespaces, :validate => :boolean, :default => false
|
68
85
|
|
69
|
-
|
86
|
+
XMLPARSEFAILURE_TAG = "_xmlparsefailure"
|
87
|
+
|
70
88
|
def register
|
71
89
|
require "nokogiri"
|
72
90
|
require "xmlsimple"
|
91
|
+
end
|
73
92
|
|
74
|
-
end # def register
|
75
|
-
|
76
|
-
public
|
77
93
|
def filter(event)
|
78
|
-
|
79
94
|
matched = false
|
80
95
|
|
81
|
-
@logger.debug("Running xml filter", :event => event)
|
82
|
-
|
83
|
-
return unless event.include?(@source)
|
96
|
+
@logger.debug? && @logger.debug("Running xml filter", :event => event)
|
84
97
|
|
85
98
|
value = event[@source]
|
99
|
+
return unless value
|
86
100
|
|
87
|
-
if value.is_a?(Array)
|
88
|
-
|
89
|
-
|
101
|
+
if value.is_a?(Array)
|
102
|
+
if value.length != 1
|
103
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
104
|
+
@logger.warn("XML filter expects single item array", :source => @source, :value => value)
|
105
|
+
return
|
106
|
+
end
|
107
|
+
|
108
|
+
value = value.first
|
109
|
+
end
|
110
|
+
|
111
|
+
unless value.is_a?(String)
|
112
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
113
|
+
@logger.warn("XML filter expects a string but received a #{value.class}", :source => @source, :value => value)
|
90
114
|
return
|
91
115
|
end
|
92
116
|
|
93
117
|
# Do nothing with an empty string.
|
94
|
-
return if value.strip.
|
118
|
+
return if value.strip.empty?
|
95
119
|
|
96
120
|
if @xpath
|
97
121
|
begin
|
98
122
|
doc = Nokogiri::XML(value, nil, value.encoding.to_s)
|
99
123
|
rescue => e
|
100
|
-
event.tag(
|
101
|
-
@logger.warn("
|
102
|
-
:exception => e, :backtrace => e.backtrace)
|
124
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
125
|
+
@logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
|
103
126
|
return
|
104
127
|
end
|
105
128
|
doc.remove_namespaces! if @remove_namespaces
|
129
|
+
|
106
130
|
@xpath.each do |xpath_src, xpath_dest|
|
107
|
-
nodeset = doc.xpath(xpath_src)
|
131
|
+
nodeset = @namespaces.empty? ? doc.xpath(xpath_src) : doc.xpath(xpath_src, @namespaces)
|
108
132
|
|
109
133
|
# If asking xpath for a String, like "name(/*)", we get back a
|
110
134
|
# String instead of a NodeSet. We normalize that here.
|
@@ -112,32 +136,39 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
112
136
|
|
113
137
|
normalized_nodeset.each do |value|
|
114
138
|
# some XPath functions return empty arrays as string
|
115
|
-
|
116
|
-
|
117
|
-
end
|
139
|
+
# TODO: (colin) the return statement here feels like a bug and should probably be a next ?
|
140
|
+
return if value.is_a?(Array) && value.length == 0
|
118
141
|
|
119
|
-
|
142
|
+
if value
|
120
143
|
matched = true
|
121
|
-
|
122
|
-
|
144
|
+
# TODO: (colin) this can probably be optimized to avoid the Event get/set at every loop iteration anf
|
145
|
+
# the array should probably be created once, filled in the loop and set at after the loop but the return
|
146
|
+
# statement above screws this strategy and is likely a bug anyway so I will not touch this until I can
|
147
|
+
# deep a big deeper and verify there is a sufficient test harness to refactor this.
|
148
|
+
data = event[xpath_dest] || []
|
149
|
+
data << value.to_s
|
150
|
+
event[xpath_dest] = data
|
151
|
+
|
152
|
+
# do not use the following construct to set the event, we cannot assume anymore that the field values are in-place mutable
|
153
|
+
# event[xpath_dest] ||= []
|
154
|
+
# event[xpath_dest] << value.to_s
|
123
155
|
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
127
159
|
|
128
160
|
if @store_xml
|
129
161
|
begin
|
130
162
|
event[@target] = XmlSimple.xml_in(value)
|
131
163
|
matched = true
|
132
164
|
rescue => e
|
133
|
-
event.tag(
|
134
|
-
@logger.warn("
|
135
|
-
:value => value, :exception => e, :backtrace => e.backtrace)
|
165
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
166
|
+
@logger.warn("Error parsing xml with XmlSimple", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
|
136
167
|
return
|
137
168
|
end
|
138
|
-
end
|
169
|
+
end
|
139
170
|
|
140
171
|
filter_matched(event) if matched
|
141
|
-
@logger.debug("Event after xml filter", :event => event)
|
142
|
-
end
|
143
|
-
end
|
172
|
+
@logger.debug? && @logger.debug("Event after xml filter", :event => event)
|
173
|
+
end
|
174
|
+
end
|
data/logstash-filter-xml.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-xml'
|
4
|
-
s.version = '2.
|
4
|
+
s.version = '2.1.1'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Takes a field that contains XML and expands it into an actual datastructure."
|
7
7
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/xml_spec.rb
CHANGED
@@ -31,6 +31,36 @@ describe LogStash::Filters::Xml do
|
|
31
31
|
insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
|
32
32
|
end
|
33
33
|
|
34
|
+
# parse xml in single item array
|
35
|
+
sample("raw" => ["<foo bar=\"baz\"/>"]) do
|
36
|
+
insist { subject["tags"] }.nil?
|
37
|
+
insist { subject["data"]} == {"bar" => "baz"}
|
38
|
+
end
|
39
|
+
|
40
|
+
# fail in multi items array
|
41
|
+
sample("raw" => ["<foo bar=\"baz\"/>", "jojoba"]) do
|
42
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
43
|
+
insist { subject["data"]} == nil
|
44
|
+
end
|
45
|
+
|
46
|
+
# fail in empty array
|
47
|
+
sample("raw" => []) do
|
48
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
49
|
+
insist { subject["data"]} == nil
|
50
|
+
end
|
51
|
+
|
52
|
+
# fail for non string field
|
53
|
+
sample("raw" => {"foo" => "bar"}) do
|
54
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
55
|
+
insist { subject["data"]} == nil
|
56
|
+
end
|
57
|
+
|
58
|
+
# fail for non string single item array
|
59
|
+
sample("raw" => [{"foo" => "bar"}]) do
|
60
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
61
|
+
insist { subject["data"]} == nil
|
62
|
+
end
|
63
|
+
|
34
64
|
#From bad xml
|
35
65
|
sample("raw" => '<foo /') do
|
36
66
|
insist { subject["tags"] }.include?("_xmlparsefailure")
|
@@ -189,6 +219,42 @@ describe LogStash::Filters::Xml do
|
|
189
219
|
end
|
190
220
|
end
|
191
221
|
|
222
|
+
describe "parse including namespaces declarations on root" do
|
223
|
+
config <<-CONFIG
|
224
|
+
filter {
|
225
|
+
xml {
|
226
|
+
source => "xmldata"
|
227
|
+
xpath => [ "/foo/h:div", "xpath_field" ]
|
228
|
+
namespaces => {"h" => "http://www.w3.org/TR/html4/"}
|
229
|
+
remove_namespaces => false
|
230
|
+
}
|
231
|
+
}
|
232
|
+
CONFIG
|
233
|
+
|
234
|
+
# Single value
|
235
|
+
sample("xmldata" => '<foo xmlns:h="http://www.w3.org/TR/html4/"><h:div>Content</h:div></foo>') do
|
236
|
+
insist { subject["xpath_field"] } == ["<h:div>Content</h:div>"]
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
describe "parse including namespaces declarations on child" do
|
241
|
+
config <<-CONFIG
|
242
|
+
filter {
|
243
|
+
xml {
|
244
|
+
source => "xmldata"
|
245
|
+
xpath => [ "/foo/h:div", "xpath_field" ]
|
246
|
+
namespaces => {"h" => "http://www.w3.org/TR/html4/"}
|
247
|
+
remove_namespaces => false
|
248
|
+
}
|
249
|
+
}
|
250
|
+
CONFIG
|
251
|
+
|
252
|
+
# Single value
|
253
|
+
sample("xmldata" => '<foo><h:div xmlns:h="http://www.w3.org/TR/html4/">Content</h:div></foo>') do
|
254
|
+
insist { subject["xpath_field"] } == ["<h:div xmlns:h=\"http://www.w3.org/TR/html4/\">Content</h:div>"]
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
192
258
|
describe "parse removing namespaces" do
|
193
259
|
config <<-CONFIG
|
194
260
|
filter {
|
metadata
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: logstash-core
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - '>='
|
17
18
|
- !ruby/object:Gem::Version
|
@@ -19,10 +20,7 @@ dependencies:
|
|
19
20
|
- - <
|
20
21
|
- !ruby/object:Gem::Version
|
21
22
|
version: 3.0.0
|
22
|
-
|
23
|
-
prerelease: false
|
24
|
-
type: :runtime
|
25
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirement: !ruby/object:Gem::Requirement
|
26
24
|
requirements:
|
27
25
|
- - '>='
|
28
26
|
- !ruby/object:Gem::Version
|
@@ -30,48 +28,50 @@ dependencies:
|
|
30
28
|
- - <
|
31
29
|
- !ruby/object:Gem::Version
|
32
30
|
version: 3.0.0
|
31
|
+
prerelease: false
|
32
|
+
type: :runtime
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
|
+
name: nokogiri
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
34
40
|
requirement: !ruby/object:Gem::Requirement
|
35
41
|
requirements:
|
36
42
|
- - '>='
|
37
43
|
- !ruby/object:Gem::Version
|
38
44
|
version: '0'
|
39
|
-
name: nokogiri
|
40
45
|
prerelease: false
|
41
46
|
type: :runtime
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: xml-simple
|
42
49
|
version_requirements: !ruby/object:Gem::Requirement
|
43
50
|
requirements:
|
44
51
|
- - '>='
|
45
52
|
- !ruby/object:Gem::Version
|
46
53
|
version: '0'
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
54
|
requirement: !ruby/object:Gem::Requirement
|
49
55
|
requirements:
|
50
56
|
- - '>='
|
51
57
|
- !ruby/object:Gem::Version
|
52
58
|
version: '0'
|
53
|
-
name: xml-simple
|
54
59
|
prerelease: false
|
55
60
|
type: :runtime
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: logstash-devutils
|
56
63
|
version_requirements: !ruby/object:Gem::Requirement
|
57
64
|
requirements:
|
58
65
|
- - '>='
|
59
66
|
- !ruby/object:Gem::Version
|
60
67
|
version: '0'
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
68
|
requirement: !ruby/object:Gem::Requirement
|
63
69
|
requirements:
|
64
70
|
- - '>='
|
65
71
|
- !ruby/object:Gem::Version
|
66
72
|
version: '0'
|
67
|
-
name: logstash-devutils
|
68
73
|
prerelease: false
|
69
74
|
type: :development
|
70
|
-
version_requirements: !ruby/object:Gem::Requirement
|
71
|
-
requirements:
|
72
|
-
- - '>='
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
version: '0'
|
75
75
|
description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
|
76
76
|
email: info@elastic.co
|
77
77
|
executables: []
|