logstash-filter-xml 2.0.2 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -2
- data/README.md +3 -0
- data/lib/logstash/filters/xml.rb +63 -32
- data/logstash-filter-xml.gemspec +1 -1
- data/spec/filters/xml_spec.rb +66 -0
- metadata +17 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0af48f46dcce1e66c87380592f9b490d0c377aae
|
4
|
+
data.tar.gz: 41ebeb840603ae50e82be8ef300d1de47c681a2f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64f80c71aa98dd82c1b846799b23aa5402518c7886059c0d01dde2789fe982076bcebce470dbeb2d7fd869e14dfa530a7185114b5a7532de4715614ef64e50ea
|
7
|
+
data.tar.gz: 03005e58d2645fbfdab2826a962a6ba7fbd9a4f2a8f48bebe822ae4a5faf3c06c57c67e8135a77178ca36fff44f395ff5a0932c013d3cc79fa425b12ab89a716
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
|
+
## 2.1.1
|
2
|
+
- Refactored field references, code cleanups
|
3
|
+
|
4
|
+
## 2.1.0
|
5
|
+
- Support for namespace declarations to use parsing the XML document
|
6
|
+
|
1
7
|
## 2.0.0
|
2
|
-
- Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
|
8
|
+
- Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
|
3
9
|
instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
|
4
10
|
- Dependency on logstash-core update to 2.0
|
5
|
-
|
data/README.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# Logstash Plugin
|
2
2
|
|
3
|
+
[![Build
|
4
|
+
Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-xml-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-xml-unit/)
|
5
|
+
|
3
6
|
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
4
7
|
|
5
8
|
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
data/lib/logstash/filters/xml.rb
CHANGED
@@ -62,49 +62,73 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
62
62
|
# field as described above. Setting this to false will prevent that.
|
63
63
|
config :store_xml, :validate => :boolean, :default => true
|
64
64
|
|
65
|
+
# By default only namespaces declarations on the root element are considered.
|
66
|
+
# This allows to configure all namespace declarations to parse the XML document.
|
67
|
+
#
|
68
|
+
# Example:
|
69
|
+
#
|
70
|
+
# [source,ruby]
|
71
|
+
# filter {
|
72
|
+
# xml {
|
73
|
+
# namespaces => {
|
74
|
+
# "xsl" => "http://www.w3.org/1999/XSL/Transform"
|
75
|
+
# "xhtml" => http://www.w3.org/1999/xhtml"
|
76
|
+
# }
|
77
|
+
# }
|
78
|
+
# }
|
79
|
+
#
|
80
|
+
config :namespaces, :validate => :hash, :default => {}
|
81
|
+
|
65
82
|
# Remove all namespaces from all nodes in the document.
|
66
83
|
# Of course, if the document had nodes with the same names but different namespaces, they will now be ambiguous.
|
67
84
|
config :remove_namespaces, :validate => :boolean, :default => false
|
68
85
|
|
69
|
-
|
86
|
+
XMLPARSEFAILURE_TAG = "_xmlparsefailure"
|
87
|
+
|
70
88
|
def register
|
71
89
|
require "nokogiri"
|
72
90
|
require "xmlsimple"
|
91
|
+
end
|
73
92
|
|
74
|
-
end # def register
|
75
|
-
|
76
|
-
public
|
77
93
|
def filter(event)
|
78
|
-
|
79
94
|
matched = false
|
80
95
|
|
81
|
-
@logger.debug("Running xml filter", :event => event)
|
82
|
-
|
83
|
-
return unless event.include?(@source)
|
96
|
+
@logger.debug? && @logger.debug("Running xml filter", :event => event)
|
84
97
|
|
85
98
|
value = event[@source]
|
99
|
+
return unless value
|
86
100
|
|
87
|
-
if value.is_a?(Array)
|
88
|
-
|
89
|
-
|
101
|
+
if value.is_a?(Array)
|
102
|
+
if value.length != 1
|
103
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
104
|
+
@logger.warn("XML filter expects single item array", :source => @source, :value => value)
|
105
|
+
return
|
106
|
+
end
|
107
|
+
|
108
|
+
value = value.first
|
109
|
+
end
|
110
|
+
|
111
|
+
unless value.is_a?(String)
|
112
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
113
|
+
@logger.warn("XML filter expects a string but received a #{value.class}", :source => @source, :value => value)
|
90
114
|
return
|
91
115
|
end
|
92
116
|
|
93
117
|
# Do nothing with an empty string.
|
94
|
-
return if value.strip.
|
118
|
+
return if value.strip.empty?
|
95
119
|
|
96
120
|
if @xpath
|
97
121
|
begin
|
98
122
|
doc = Nokogiri::XML(value, nil, value.encoding.to_s)
|
99
123
|
rescue => e
|
100
|
-
event.tag(
|
101
|
-
@logger.warn("
|
102
|
-
:exception => e, :backtrace => e.backtrace)
|
124
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
125
|
+
@logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
|
103
126
|
return
|
104
127
|
end
|
105
128
|
doc.remove_namespaces! if @remove_namespaces
|
129
|
+
|
106
130
|
@xpath.each do |xpath_src, xpath_dest|
|
107
|
-
nodeset = doc.xpath(xpath_src)
|
131
|
+
nodeset = @namespaces.empty? ? doc.xpath(xpath_src) : doc.xpath(xpath_src, @namespaces)
|
108
132
|
|
109
133
|
# If asking xpath for a String, like "name(/*)", we get back a
|
110
134
|
# String instead of a NodeSet. We normalize that here.
|
@@ -112,32 +136,39 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
112
136
|
|
113
137
|
normalized_nodeset.each do |value|
|
114
138
|
# some XPath functions return empty arrays as string
|
115
|
-
|
116
|
-
|
117
|
-
end
|
139
|
+
# TODO: (colin) the return statement here feels like a bug and should probably be a next ?
|
140
|
+
return if value.is_a?(Array) && value.length == 0
|
118
141
|
|
119
|
-
|
142
|
+
if value
|
120
143
|
matched = true
|
121
|
-
|
122
|
-
|
144
|
+
# TODO: (colin) this can probably be optimized to avoid the Event get/set at every loop iteration anf
|
145
|
+
# the array should probably be created once, filled in the loop and set at after the loop but the return
|
146
|
+
# statement above screws this strategy and is likely a bug anyway so I will not touch this until I can
|
147
|
+
# deep a big deeper and verify there is a sufficient test harness to refactor this.
|
148
|
+
data = event[xpath_dest] || []
|
149
|
+
data << value.to_s
|
150
|
+
event[xpath_dest] = data
|
151
|
+
|
152
|
+
# do not use the following construct to set the event, we cannot assume anymore that the field values are in-place mutable
|
153
|
+
# event[xpath_dest] ||= []
|
154
|
+
# event[xpath_dest] << value.to_s
|
123
155
|
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
127
159
|
|
128
160
|
if @store_xml
|
129
161
|
begin
|
130
162
|
event[@target] = XmlSimple.xml_in(value)
|
131
163
|
matched = true
|
132
164
|
rescue => e
|
133
|
-
event.tag(
|
134
|
-
@logger.warn("
|
135
|
-
:value => value, :exception => e, :backtrace => e.backtrace)
|
165
|
+
event.tag(XMLPARSEFAILURE_TAG)
|
166
|
+
@logger.warn("Error parsing xml with XmlSimple", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
|
136
167
|
return
|
137
168
|
end
|
138
|
-
end
|
169
|
+
end
|
139
170
|
|
140
171
|
filter_matched(event) if matched
|
141
|
-
@logger.debug("Event after xml filter", :event => event)
|
142
|
-
end
|
143
|
-
end
|
172
|
+
@logger.debug? && @logger.debug("Event after xml filter", :event => event)
|
173
|
+
end
|
174
|
+
end
|
data/logstash-filter-xml.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-xml'
|
4
|
-
s.version = '2.
|
4
|
+
s.version = '2.1.1'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Takes a field that contains XML and expands it into an actual datastructure."
|
7
7
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/xml_spec.rb
CHANGED
@@ -31,6 +31,36 @@ describe LogStash::Filters::Xml do
|
|
31
31
|
insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
|
32
32
|
end
|
33
33
|
|
34
|
+
# parse xml in single item array
|
35
|
+
sample("raw" => ["<foo bar=\"baz\"/>"]) do
|
36
|
+
insist { subject["tags"] }.nil?
|
37
|
+
insist { subject["data"]} == {"bar" => "baz"}
|
38
|
+
end
|
39
|
+
|
40
|
+
# fail in multi items array
|
41
|
+
sample("raw" => ["<foo bar=\"baz\"/>", "jojoba"]) do
|
42
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
43
|
+
insist { subject["data"]} == nil
|
44
|
+
end
|
45
|
+
|
46
|
+
# fail in empty array
|
47
|
+
sample("raw" => []) do
|
48
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
49
|
+
insist { subject["data"]} == nil
|
50
|
+
end
|
51
|
+
|
52
|
+
# fail for non string field
|
53
|
+
sample("raw" => {"foo" => "bar"}) do
|
54
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
55
|
+
insist { subject["data"]} == nil
|
56
|
+
end
|
57
|
+
|
58
|
+
# fail for non string single item array
|
59
|
+
sample("raw" => [{"foo" => "bar"}]) do
|
60
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
61
|
+
insist { subject["data"]} == nil
|
62
|
+
end
|
63
|
+
|
34
64
|
#From bad xml
|
35
65
|
sample("raw" => '<foo /') do
|
36
66
|
insist { subject["tags"] }.include?("_xmlparsefailure")
|
@@ -189,6 +219,42 @@ describe LogStash::Filters::Xml do
|
|
189
219
|
end
|
190
220
|
end
|
191
221
|
|
222
|
+
describe "parse including namespaces declarations on root" do
|
223
|
+
config <<-CONFIG
|
224
|
+
filter {
|
225
|
+
xml {
|
226
|
+
source => "xmldata"
|
227
|
+
xpath => [ "/foo/h:div", "xpath_field" ]
|
228
|
+
namespaces => {"h" => "http://www.w3.org/TR/html4/"}
|
229
|
+
remove_namespaces => false
|
230
|
+
}
|
231
|
+
}
|
232
|
+
CONFIG
|
233
|
+
|
234
|
+
# Single value
|
235
|
+
sample("xmldata" => '<foo xmlns:h="http://www.w3.org/TR/html4/"><h:div>Content</h:div></foo>') do
|
236
|
+
insist { subject["xpath_field"] } == ["<h:div>Content</h:div>"]
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
describe "parse including namespaces declarations on child" do
|
241
|
+
config <<-CONFIG
|
242
|
+
filter {
|
243
|
+
xml {
|
244
|
+
source => "xmldata"
|
245
|
+
xpath => [ "/foo/h:div", "xpath_field" ]
|
246
|
+
namespaces => {"h" => "http://www.w3.org/TR/html4/"}
|
247
|
+
remove_namespaces => false
|
248
|
+
}
|
249
|
+
}
|
250
|
+
CONFIG
|
251
|
+
|
252
|
+
# Single value
|
253
|
+
sample("xmldata" => '<foo><h:div xmlns:h="http://www.w3.org/TR/html4/">Content</h:div></foo>') do
|
254
|
+
insist { subject["xpath_field"] } == ["<h:div xmlns:h=\"http://www.w3.org/TR/html4/\">Content</h:div>"]
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
192
258
|
describe "parse removing namespaces" do
|
193
259
|
config <<-CONFIG
|
194
260
|
filter {
|
metadata
CHANGED
@@ -1,17 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
|
14
|
+
name: logstash-core
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
15
16
|
requirements:
|
16
17
|
- - '>='
|
17
18
|
- !ruby/object:Gem::Version
|
@@ -19,10 +20,7 @@ dependencies:
|
|
19
20
|
- - <
|
20
21
|
- !ruby/object:Gem::Version
|
21
22
|
version: 3.0.0
|
22
|
-
|
23
|
-
prerelease: false
|
24
|
-
type: :runtime
|
25
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirement: !ruby/object:Gem::Requirement
|
26
24
|
requirements:
|
27
25
|
- - '>='
|
28
26
|
- !ruby/object:Gem::Version
|
@@ -30,48 +28,50 @@ dependencies:
|
|
30
28
|
- - <
|
31
29
|
- !ruby/object:Gem::Version
|
32
30
|
version: 3.0.0
|
31
|
+
prerelease: false
|
32
|
+
type: :runtime
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
|
+
name: nokogiri
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
34
40
|
requirement: !ruby/object:Gem::Requirement
|
35
41
|
requirements:
|
36
42
|
- - '>='
|
37
43
|
- !ruby/object:Gem::Version
|
38
44
|
version: '0'
|
39
|
-
name: nokogiri
|
40
45
|
prerelease: false
|
41
46
|
type: :runtime
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: xml-simple
|
42
49
|
version_requirements: !ruby/object:Gem::Requirement
|
43
50
|
requirements:
|
44
51
|
- - '>='
|
45
52
|
- !ruby/object:Gem::Version
|
46
53
|
version: '0'
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
54
|
requirement: !ruby/object:Gem::Requirement
|
49
55
|
requirements:
|
50
56
|
- - '>='
|
51
57
|
- !ruby/object:Gem::Version
|
52
58
|
version: '0'
|
53
|
-
name: xml-simple
|
54
59
|
prerelease: false
|
55
60
|
type: :runtime
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: logstash-devutils
|
56
63
|
version_requirements: !ruby/object:Gem::Requirement
|
57
64
|
requirements:
|
58
65
|
- - '>='
|
59
66
|
- !ruby/object:Gem::Version
|
60
67
|
version: '0'
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
68
|
requirement: !ruby/object:Gem::Requirement
|
63
69
|
requirements:
|
64
70
|
- - '>='
|
65
71
|
- !ruby/object:Gem::Version
|
66
72
|
version: '0'
|
67
|
-
name: logstash-devutils
|
68
73
|
prerelease: false
|
69
74
|
type: :development
|
70
|
-
version_requirements: !ruby/object:Gem::Requirement
|
71
|
-
requirements:
|
72
|
-
- - '>='
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
version: '0'
|
75
75
|
description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
|
76
76
|
email: info@elastic.co
|
77
77
|
executables: []
|