logstash-filter-xml 2.0.2 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5eeac68d6edb559c401be45588545472d0f7251c
4
- data.tar.gz: 31b715339f8138873c3d9d40680fb163c8747589
3
+ metadata.gz: 0af48f46dcce1e66c87380592f9b490d0c377aae
4
+ data.tar.gz: 41ebeb840603ae50e82be8ef300d1de47c681a2f
5
5
  SHA512:
6
- metadata.gz: 76065a921e5de7e614e934849a8e1264b7086c2a64a5d2ff75c233c050b576e5def73ef0b860c25f4731effb83f9ad02e89319f9f1307e60be1a8c88d214a8ae
7
- data.tar.gz: 371c85df1aa6ae51f00f172b6056a58e190d7a9b93fcc461c808193faf762578bcc3d1791996267a374145c5a1b9de591b3240378e3460fec6ac6bfaf2ac18bd
6
+ metadata.gz: 64f80c71aa98dd82c1b846799b23aa5402518c7886059c0d01dde2789fe982076bcebce470dbeb2d7fd869e14dfa530a7185114b5a7532de4715614ef64e50ea
7
+ data.tar.gz: 03005e58d2645fbfdab2826a962a6ba7fbd9a4f2a8f48bebe822ae4a5faf3c06c57c67e8135a77178ca36fff44f395ff5a0932c013d3cc79fa425b12ab89a716
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
+ ## 2.1.1
2
+ - Refactored field references, code cleanups
3
+
4
+ ## 2.1.0
5
+ - Support for namespace declarations to use parsing the XML document
6
+
1
7
  ## 2.0.0
2
- - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
8
+ - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
3
9
  instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
4
10
  - Dependency on logstash-core update to 2.0
5
-
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Logstash Plugin
2
2
 
3
+ [![Build
4
+ Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-xml-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-xml-unit/)
5
+
3
6
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
4
7
 
5
8
  It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
@@ -62,49 +62,73 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
62
62
  # field as described above. Setting this to false will prevent that.
63
63
  config :store_xml, :validate => :boolean, :default => true
64
64
 
65
+ # By default only namespaces declarations on the root element are considered.
66
+ # This allows to configure all namespace declarations to parse the XML document.
67
+ #
68
+ # Example:
69
+ #
70
+ # [source,ruby]
71
+ # filter {
72
+ # xml {
73
+ # namespaces => {
74
+ # "xsl" => "http://www.w3.org/1999/XSL/Transform"
75
+ # "xhtml" => http://www.w3.org/1999/xhtml"
76
+ # }
77
+ # }
78
+ # }
79
+ #
80
+ config :namespaces, :validate => :hash, :default => {}
81
+
65
82
  # Remove all namespaces from all nodes in the document.
66
83
  # Of course, if the document had nodes with the same names but different namespaces, they will now be ambiguous.
67
84
  config :remove_namespaces, :validate => :boolean, :default => false
68
85
 
69
- public
86
+ XMLPARSEFAILURE_TAG = "_xmlparsefailure"
87
+
70
88
  def register
71
89
  require "nokogiri"
72
90
  require "xmlsimple"
91
+ end
73
92
 
74
- end # def register
75
-
76
- public
77
93
  def filter(event)
78
-
79
94
  matched = false
80
95
 
81
- @logger.debug("Running xml filter", :event => event)
82
-
83
- return unless event.include?(@source)
96
+ @logger.debug? && @logger.debug("Running xml filter", :event => event)
84
97
 
85
98
  value = event[@source]
99
+ return unless value
86
100
 
87
- if value.is_a?(Array) && value.length > 1
88
- @logger.warn("XML filter only works on fields of length 1",
89
- :source => @source, :value => value)
101
+ if value.is_a?(Array)
102
+ if value.length != 1
103
+ event.tag(XMLPARSEFAILURE_TAG)
104
+ @logger.warn("XML filter expects single item array", :source => @source, :value => value)
105
+ return
106
+ end
107
+
108
+ value = value.first
109
+ end
110
+
111
+ unless value.is_a?(String)
112
+ event.tag(XMLPARSEFAILURE_TAG)
113
+ @logger.warn("XML filter expects a string but received a #{value.class}", :source => @source, :value => value)
90
114
  return
91
115
  end
92
116
 
93
117
  # Do nothing with an empty string.
94
- return if value.strip.length == 0
118
+ return if value.strip.empty?
95
119
 
96
120
  if @xpath
97
121
  begin
98
122
  doc = Nokogiri::XML(value, nil, value.encoding.to_s)
99
123
  rescue => e
100
- event.tag("_xmlparsefailure")
101
- @logger.warn("Trouble parsing xml", :source => @source, :value => value,
102
- :exception => e, :backtrace => e.backtrace)
124
+ event.tag(XMLPARSEFAILURE_TAG)
125
+ @logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
103
126
  return
104
127
  end
105
128
  doc.remove_namespaces! if @remove_namespaces
129
+
106
130
  @xpath.each do |xpath_src, xpath_dest|
107
- nodeset = doc.xpath(xpath_src)
131
+ nodeset = @namespaces.empty? ? doc.xpath(xpath_src) : doc.xpath(xpath_src, @namespaces)
108
132
 
109
133
  # If asking xpath for a String, like "name(/*)", we get back a
110
134
  # String instead of a NodeSet. We normalize that here.
@@ -112,32 +136,39 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
112
136
 
113
137
  normalized_nodeset.each do |value|
114
138
  # some XPath functions return empty arrays as string
115
- if value.is_a?(Array)
116
- return if value.length == 0
117
- end
139
+ # TODO: (colin) the return statement here feels like a bug and should probably be a next ?
140
+ return if value.is_a?(Array) && value.length == 0
118
141
 
119
- unless value.nil?
142
+ if value
120
143
  matched = true
121
- event[xpath_dest] ||= []
122
- event[xpath_dest] << value.to_s
144
+ # TODO: (colin) this can probably be optimized to avoid the Event get/set at every loop iteration anf
145
+ # the array should probably be created once, filled in the loop and set at after the loop but the return
146
+ # statement above screws this strategy and is likely a bug anyway so I will not touch this until I can
147
+ # deep a big deeper and verify there is a sufficient test harness to refactor this.
148
+ data = event[xpath_dest] || []
149
+ data << value.to_s
150
+ event[xpath_dest] = data
151
+
152
+ # do not use the following construct to set the event, we cannot assume anymore that the field values are in-place mutable
153
+ # event[xpath_dest] ||= []
154
+ # event[xpath_dest] << value.to_s
123
155
  end
124
- end # XPath.each
125
- end # @xpath.each
126
- end # if @xpath
156
+ end
157
+ end
158
+ end
127
159
 
128
160
  if @store_xml
129
161
  begin
130
162
  event[@target] = XmlSimple.xml_in(value)
131
163
  matched = true
132
164
  rescue => e
133
- event.tag("_xmlparsefailure")
134
- @logger.warn("Trouble parsing xml with XmlSimple", :source => @source,
135
- :value => value, :exception => e, :backtrace => e.backtrace)
165
+ event.tag(XMLPARSEFAILURE_TAG)
166
+ @logger.warn("Error parsing xml with XmlSimple", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
136
167
  return
137
168
  end
138
- end # if @store_xml
169
+ end
139
170
 
140
171
  filter_matched(event) if matched
141
- @logger.debug("Event after xml filter", :event => event)
142
- end # def filter
143
- end # class LogStash::Filters::Xml
172
+ @logger.debug? && @logger.debug("Event after xml filter", :event => event)
173
+ end
174
+ end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-xml'
4
- s.version = '2.0.2'
4
+ s.version = '2.1.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Takes a field that contains XML and expands it into an actual datastructure."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -31,6 +31,36 @@ describe LogStash::Filters::Xml do
31
31
  insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
32
32
  end
33
33
 
34
+ # parse xml in single item array
35
+ sample("raw" => ["<foo bar=\"baz\"/>"]) do
36
+ insist { subject["tags"] }.nil?
37
+ insist { subject["data"]} == {"bar" => "baz"}
38
+ end
39
+
40
+ # fail in multi items array
41
+ sample("raw" => ["<foo bar=\"baz\"/>", "jojoba"]) do
42
+ insist { subject["tags"] }.include?("_xmlparsefailure")
43
+ insist { subject["data"]} == nil
44
+ end
45
+
46
+ # fail in empty array
47
+ sample("raw" => []) do
48
+ insist { subject["tags"] }.include?("_xmlparsefailure")
49
+ insist { subject["data"]} == nil
50
+ end
51
+
52
+ # fail for non string field
53
+ sample("raw" => {"foo" => "bar"}) do
54
+ insist { subject["tags"] }.include?("_xmlparsefailure")
55
+ insist { subject["data"]} == nil
56
+ end
57
+
58
+ # fail for non string single item array
59
+ sample("raw" => [{"foo" => "bar"}]) do
60
+ insist { subject["tags"] }.include?("_xmlparsefailure")
61
+ insist { subject["data"]} == nil
62
+ end
63
+
34
64
  #From bad xml
35
65
  sample("raw" => '<foo /') do
36
66
  insist { subject["tags"] }.include?("_xmlparsefailure")
@@ -189,6 +219,42 @@ describe LogStash::Filters::Xml do
189
219
  end
190
220
  end
191
221
 
222
+ describe "parse including namespaces declarations on root" do
223
+ config <<-CONFIG
224
+ filter {
225
+ xml {
226
+ source => "xmldata"
227
+ xpath => [ "/foo/h:div", "xpath_field" ]
228
+ namespaces => {"h" => "http://www.w3.org/TR/html4/"}
229
+ remove_namespaces => false
230
+ }
231
+ }
232
+ CONFIG
233
+
234
+ # Single value
235
+ sample("xmldata" => '<foo xmlns:h="http://www.w3.org/TR/html4/"><h:div>Content</h:div></foo>') do
236
+ insist { subject["xpath_field"] } == ["<h:div>Content</h:div>"]
237
+ end
238
+ end
239
+
240
+ describe "parse including namespaces declarations on child" do
241
+ config <<-CONFIG
242
+ filter {
243
+ xml {
244
+ source => "xmldata"
245
+ xpath => [ "/foo/h:div", "xpath_field" ]
246
+ namespaces => {"h" => "http://www.w3.org/TR/html4/"}
247
+ remove_namespaces => false
248
+ }
249
+ }
250
+ CONFIG
251
+
252
+ # Single value
253
+ sample("xmldata" => '<foo><h:div xmlns:h="http://www.w3.org/TR/html4/">Content</h:div></foo>') do
254
+ insist { subject["xpath_field"] } == ["<h:div xmlns:h=\"http://www.w3.org/TR/html4/\">Content</h:div>"]
255
+ end
256
+ end
257
+
192
258
  describe "parse removing namespaces" do
193
259
  config <<-CONFIG
194
260
  filter {
metadata CHANGED
@@ -1,17 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-xml
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-14 00:00:00.000000000 Z
11
+ date: 2015-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- requirement: !ruby/object:Gem::Requirement
14
+ name: logstash-core
15
+ version_requirements: !ruby/object:Gem::Requirement
15
16
  requirements:
16
17
  - - '>='
17
18
  - !ruby/object:Gem::Version
@@ -19,10 +20,7 @@ dependencies:
19
20
  - - <
20
21
  - !ruby/object:Gem::Version
21
22
  version: 3.0.0
22
- name: logstash-core
23
- prerelease: false
24
- type: :runtime
25
- version_requirements: !ruby/object:Gem::Requirement
23
+ requirement: !ruby/object:Gem::Requirement
26
24
  requirements:
27
25
  - - '>='
28
26
  - !ruby/object:Gem::Version
@@ -30,48 +28,50 @@ dependencies:
30
28
  - - <
31
29
  - !ruby/object:Gem::Version
32
30
  version: 3.0.0
31
+ prerelease: false
32
+ type: :runtime
33
33
  - !ruby/object:Gem::Dependency
34
+ name: nokogiri
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
34
40
  requirement: !ruby/object:Gem::Requirement
35
41
  requirements:
36
42
  - - '>='
37
43
  - !ruby/object:Gem::Version
38
44
  version: '0'
39
- name: nokogiri
40
45
  prerelease: false
41
46
  type: :runtime
47
+ - !ruby/object:Gem::Dependency
48
+ name: xml-simple
42
49
  version_requirements: !ruby/object:Gem::Requirement
43
50
  requirements:
44
51
  - - '>='
45
52
  - !ruby/object:Gem::Version
46
53
  version: '0'
47
- - !ruby/object:Gem::Dependency
48
54
  requirement: !ruby/object:Gem::Requirement
49
55
  requirements:
50
56
  - - '>='
51
57
  - !ruby/object:Gem::Version
52
58
  version: '0'
53
- name: xml-simple
54
59
  prerelease: false
55
60
  type: :runtime
61
+ - !ruby/object:Gem::Dependency
62
+ name: logstash-devutils
56
63
  version_requirements: !ruby/object:Gem::Requirement
57
64
  requirements:
58
65
  - - '>='
59
66
  - !ruby/object:Gem::Version
60
67
  version: '0'
61
- - !ruby/object:Gem::Dependency
62
68
  requirement: !ruby/object:Gem::Requirement
63
69
  requirements:
64
70
  - - '>='
65
71
  - !ruby/object:Gem::Version
66
72
  version: '0'
67
- name: logstash-devutils
68
73
  prerelease: false
69
74
  type: :development
70
- version_requirements: !ruby/object:Gem::Requirement
71
- requirements:
72
- - - '>='
73
- - !ruby/object:Gem::Version
74
- version: '0'
75
75
  description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
76
76
  email: info@elastic.co
77
77
  executables: []