logstash-filter-xml 2.0.2 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5eeac68d6edb559c401be45588545472d0f7251c
4
- data.tar.gz: 31b715339f8138873c3d9d40680fb163c8747589
3
+ metadata.gz: 0af48f46dcce1e66c87380592f9b490d0c377aae
4
+ data.tar.gz: 41ebeb840603ae50e82be8ef300d1de47c681a2f
5
5
  SHA512:
6
- metadata.gz: 76065a921e5de7e614e934849a8e1264b7086c2a64a5d2ff75c233c050b576e5def73ef0b860c25f4731effb83f9ad02e89319f9f1307e60be1a8c88d214a8ae
7
- data.tar.gz: 371c85df1aa6ae51f00f172b6056a58e190d7a9b93fcc461c808193faf762578bcc3d1791996267a374145c5a1b9de591b3240378e3460fec6ac6bfaf2ac18bd
6
+ metadata.gz: 64f80c71aa98dd82c1b846799b23aa5402518c7886059c0d01dde2789fe982076bcebce470dbeb2d7fd869e14dfa530a7185114b5a7532de4715614ef64e50ea
7
+ data.tar.gz: 03005e58d2645fbfdab2826a962a6ba7fbd9a4f2a8f48bebe822ae4a5faf3c06c57c67e8135a77178ca36fff44f395ff5a0932c013d3cc79fa425b12ab89a716
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
+ ## 2.1.1
2
+ - Refactored field references, code cleanups
3
+
4
+ ## 2.1.0
5
+ - Support for namespace declarations to use parsing the XML document
6
+
1
7
  ## 2.0.0
2
- - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
8
+ - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
3
9
  instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
4
10
  - Dependency on logstash-core update to 2.0
5
-
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Logstash Plugin
2
2
 
3
+ [![Build
4
+ Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-xml-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-xml-unit/)
5
+
3
6
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
4
7
 
5
8
  It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
@@ -62,49 +62,73 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
62
62
  # field as described above. Setting this to false will prevent that.
63
63
  config :store_xml, :validate => :boolean, :default => true
64
64
 
65
+ # By default only namespaces declarations on the root element are considered.
66
+ # This allows to configure all namespace declarations to parse the XML document.
67
+ #
68
+ # Example:
69
+ #
70
+ # [source,ruby]
71
+ # filter {
72
+ # xml {
73
+ # namespaces => {
74
+ # "xsl" => "http://www.w3.org/1999/XSL/Transform"
75
+ # "xhtml" => http://www.w3.org/1999/xhtml"
76
+ # }
77
+ # }
78
+ # }
79
+ #
80
+ config :namespaces, :validate => :hash, :default => {}
81
+
65
82
  # Remove all namespaces from all nodes in the document.
66
83
  # Of course, if the document had nodes with the same names but different namespaces, they will now be ambiguous.
67
84
  config :remove_namespaces, :validate => :boolean, :default => false
68
85
 
69
- public
86
+ XMLPARSEFAILURE_TAG = "_xmlparsefailure"
87
+
70
88
  def register
71
89
  require "nokogiri"
72
90
  require "xmlsimple"
91
+ end
73
92
 
74
- end # def register
75
-
76
- public
77
93
  def filter(event)
78
-
79
94
  matched = false
80
95
 
81
- @logger.debug("Running xml filter", :event => event)
82
-
83
- return unless event.include?(@source)
96
+ @logger.debug? && @logger.debug("Running xml filter", :event => event)
84
97
 
85
98
  value = event[@source]
99
+ return unless value
86
100
 
87
- if value.is_a?(Array) && value.length > 1
88
- @logger.warn("XML filter only works on fields of length 1",
89
- :source => @source, :value => value)
101
+ if value.is_a?(Array)
102
+ if value.length != 1
103
+ event.tag(XMLPARSEFAILURE_TAG)
104
+ @logger.warn("XML filter expects single item array", :source => @source, :value => value)
105
+ return
106
+ end
107
+
108
+ value = value.first
109
+ end
110
+
111
+ unless value.is_a?(String)
112
+ event.tag(XMLPARSEFAILURE_TAG)
113
+ @logger.warn("XML filter expects a string but received a #{value.class}", :source => @source, :value => value)
90
114
  return
91
115
  end
92
116
 
93
117
  # Do nothing with an empty string.
94
- return if value.strip.length == 0
118
+ return if value.strip.empty?
95
119
 
96
120
  if @xpath
97
121
  begin
98
122
  doc = Nokogiri::XML(value, nil, value.encoding.to_s)
99
123
  rescue => e
100
- event.tag("_xmlparsefailure")
101
- @logger.warn("Trouble parsing xml", :source => @source, :value => value,
102
- :exception => e, :backtrace => e.backtrace)
124
+ event.tag(XMLPARSEFAILURE_TAG)
125
+ @logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
103
126
  return
104
127
  end
105
128
  doc.remove_namespaces! if @remove_namespaces
129
+
106
130
  @xpath.each do |xpath_src, xpath_dest|
107
- nodeset = doc.xpath(xpath_src)
131
+ nodeset = @namespaces.empty? ? doc.xpath(xpath_src) : doc.xpath(xpath_src, @namespaces)
108
132
 
109
133
  # If asking xpath for a String, like "name(/*)", we get back a
110
134
  # String instead of a NodeSet. We normalize that here.
@@ -112,32 +136,39 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
112
136
 
113
137
  normalized_nodeset.each do |value|
114
138
  # some XPath functions return empty arrays as string
115
- if value.is_a?(Array)
116
- return if value.length == 0
117
- end
139
+ # TODO: (colin) the return statement here feels like a bug and should probably be a next ?
140
+ return if value.is_a?(Array) && value.length == 0
118
141
 
119
- unless value.nil?
142
+ if value
120
143
  matched = true
121
- event[xpath_dest] ||= []
122
- event[xpath_dest] << value.to_s
144
+ # TODO: (colin) this can probably be optimized to avoid the Event get/set at every loop iteration anf
145
+ # the array should probably be created once, filled in the loop and set at after the loop but the return
146
+ # statement above screws this strategy and is likely a bug anyway so I will not touch this until I can
147
+ # deep a big deeper and verify there is a sufficient test harness to refactor this.
148
+ data = event[xpath_dest] || []
149
+ data << value.to_s
150
+ event[xpath_dest] = data
151
+
152
+ # do not use the following construct to set the event, we cannot assume anymore that the field values are in-place mutable
153
+ # event[xpath_dest] ||= []
154
+ # event[xpath_dest] << value.to_s
123
155
  end
124
- end # XPath.each
125
- end # @xpath.each
126
- end # if @xpath
156
+ end
157
+ end
158
+ end
127
159
 
128
160
  if @store_xml
129
161
  begin
130
162
  event[@target] = XmlSimple.xml_in(value)
131
163
  matched = true
132
164
  rescue => e
133
- event.tag("_xmlparsefailure")
134
- @logger.warn("Trouble parsing xml with XmlSimple", :source => @source,
135
- :value => value, :exception => e, :backtrace => e.backtrace)
165
+ event.tag(XMLPARSEFAILURE_TAG)
166
+ @logger.warn("Error parsing xml with XmlSimple", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
136
167
  return
137
168
  end
138
- end # if @store_xml
169
+ end
139
170
 
140
171
  filter_matched(event) if matched
141
- @logger.debug("Event after xml filter", :event => event)
142
- end # def filter
143
- end # class LogStash::Filters::Xml
172
+ @logger.debug? && @logger.debug("Event after xml filter", :event => event)
173
+ end
174
+ end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-xml'
4
- s.version = '2.0.2'
4
+ s.version = '2.1.1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Takes a field that contains XML and expands it into an actual datastructure."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -31,6 +31,36 @@ describe LogStash::Filters::Xml do
31
31
  insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
32
32
  end
33
33
 
34
+ # parse xml in single item array
35
+ sample("raw" => ["<foo bar=\"baz\"/>"]) do
36
+ insist { subject["tags"] }.nil?
37
+ insist { subject["data"]} == {"bar" => "baz"}
38
+ end
39
+
40
+ # fail in multi items array
41
+ sample("raw" => ["<foo bar=\"baz\"/>", "jojoba"]) do
42
+ insist { subject["tags"] }.include?("_xmlparsefailure")
43
+ insist { subject["data"]} == nil
44
+ end
45
+
46
+ # fail in empty array
47
+ sample("raw" => []) do
48
+ insist { subject["tags"] }.include?("_xmlparsefailure")
49
+ insist { subject["data"]} == nil
50
+ end
51
+
52
+ # fail for non string field
53
+ sample("raw" => {"foo" => "bar"}) do
54
+ insist { subject["tags"] }.include?("_xmlparsefailure")
55
+ insist { subject["data"]} == nil
56
+ end
57
+
58
+ # fail for non string single item array
59
+ sample("raw" => [{"foo" => "bar"}]) do
60
+ insist { subject["tags"] }.include?("_xmlparsefailure")
61
+ insist { subject["data"]} == nil
62
+ end
63
+
34
64
  #From bad xml
35
65
  sample("raw" => '<foo /') do
36
66
  insist { subject["tags"] }.include?("_xmlparsefailure")
@@ -189,6 +219,42 @@ describe LogStash::Filters::Xml do
189
219
  end
190
220
  end
191
221
 
222
+ describe "parse including namespaces declarations on root" do
223
+ config <<-CONFIG
224
+ filter {
225
+ xml {
226
+ source => "xmldata"
227
+ xpath => [ "/foo/h:div", "xpath_field" ]
228
+ namespaces => {"h" => "http://www.w3.org/TR/html4/"}
229
+ remove_namespaces => false
230
+ }
231
+ }
232
+ CONFIG
233
+
234
+ # Single value
235
+ sample("xmldata" => '<foo xmlns:h="http://www.w3.org/TR/html4/"><h:div>Content</h:div></foo>') do
236
+ insist { subject["xpath_field"] } == ["<h:div>Content</h:div>"]
237
+ end
238
+ end
239
+
240
+ describe "parse including namespaces declarations on child" do
241
+ config <<-CONFIG
242
+ filter {
243
+ xml {
244
+ source => "xmldata"
245
+ xpath => [ "/foo/h:div", "xpath_field" ]
246
+ namespaces => {"h" => "http://www.w3.org/TR/html4/"}
247
+ remove_namespaces => false
248
+ }
249
+ }
250
+ CONFIG
251
+
252
+ # Single value
253
+ sample("xmldata" => '<foo><h:div xmlns:h="http://www.w3.org/TR/html4/">Content</h:div></foo>') do
254
+ insist { subject["xpath_field"] } == ["<h:div xmlns:h=\"http://www.w3.org/TR/html4/\">Content</h:div>"]
255
+ end
256
+ end
257
+
192
258
  describe "parse removing namespaces" do
193
259
  config <<-CONFIG
194
260
  filter {
metadata CHANGED
@@ -1,17 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-xml
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-14 00:00:00.000000000 Z
11
+ date: 2015-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- requirement: !ruby/object:Gem::Requirement
14
+ name: logstash-core
15
+ version_requirements: !ruby/object:Gem::Requirement
15
16
  requirements:
16
17
  - - '>='
17
18
  - !ruby/object:Gem::Version
@@ -19,10 +20,7 @@ dependencies:
19
20
  - - <
20
21
  - !ruby/object:Gem::Version
21
22
  version: 3.0.0
22
- name: logstash-core
23
- prerelease: false
24
- type: :runtime
25
- version_requirements: !ruby/object:Gem::Requirement
23
+ requirement: !ruby/object:Gem::Requirement
26
24
  requirements:
27
25
  - - '>='
28
26
  - !ruby/object:Gem::Version
@@ -30,48 +28,50 @@ dependencies:
30
28
  - - <
31
29
  - !ruby/object:Gem::Version
32
30
  version: 3.0.0
31
+ prerelease: false
32
+ type: :runtime
33
33
  - !ruby/object:Gem::Dependency
34
+ name: nokogiri
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
34
40
  requirement: !ruby/object:Gem::Requirement
35
41
  requirements:
36
42
  - - '>='
37
43
  - !ruby/object:Gem::Version
38
44
  version: '0'
39
- name: nokogiri
40
45
  prerelease: false
41
46
  type: :runtime
47
+ - !ruby/object:Gem::Dependency
48
+ name: xml-simple
42
49
  version_requirements: !ruby/object:Gem::Requirement
43
50
  requirements:
44
51
  - - '>='
45
52
  - !ruby/object:Gem::Version
46
53
  version: '0'
47
- - !ruby/object:Gem::Dependency
48
54
  requirement: !ruby/object:Gem::Requirement
49
55
  requirements:
50
56
  - - '>='
51
57
  - !ruby/object:Gem::Version
52
58
  version: '0'
53
- name: xml-simple
54
59
  prerelease: false
55
60
  type: :runtime
61
+ - !ruby/object:Gem::Dependency
62
+ name: logstash-devutils
56
63
  version_requirements: !ruby/object:Gem::Requirement
57
64
  requirements:
58
65
  - - '>='
59
66
  - !ruby/object:Gem::Version
60
67
  version: '0'
61
- - !ruby/object:Gem::Dependency
62
68
  requirement: !ruby/object:Gem::Requirement
63
69
  requirements:
64
70
  - - '>='
65
71
  - !ruby/object:Gem::Version
66
72
  version: '0'
67
- name: logstash-devutils
68
73
  prerelease: false
69
74
  type: :development
70
- version_requirements: !ruby/object:Gem::Requirement
71
- requirements:
72
- - - '>='
73
- - !ruby/object:Gem::Version
74
- version: '0'
75
75
  description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
76
76
  email: info@elastic.co
77
77
  executables: []