logstash-filter-xml 4.0.7 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5752546d28d84f25aba5b8938f3554668455b2ba53a20bb2d9d555a32339bc21
4
- data.tar.gz: 6f35d6fcf1915907399f5c3254a4f555ded7e3e8e80e3118691ea4aba1d3b922
3
+ metadata.gz: e533534aa762b35aa38226b37adfda6d03678657070439da36c3bd639a4947a5
4
+ data.tar.gz: f8010acbb8c698a6809748562301d1dc74d90396a5fc7a32cc4f23005201e8a5
5
5
  SHA512:
6
- metadata.gz: b567fab011de36e7976dc9939b3d4ae06933d2c5ff56a54c95794c8d25a0f81e29d6aa2b22f459203dd9f9f3f5c30b9e5d750d96aec047f2ca990c66c5d553ea
7
- data.tar.gz: 3e5b8fedd5ee99177cf1462487100572aa8a4620985ba89bdf263e0d836b74381735bb919c9892d66d7f09f4323317c85f1e69f7f7a7597770799048cd1059a2
6
+ metadata.gz: e5dd55d1f420614894a97eb5e4848e1ef9302cb5879edad959d96c2b79855fa22928d62539585731451bc4da6771e2c97d8ff8bec7f157a0639f0f98afba532a
7
+ data.tar.gz: 8af5b8843765191ddce68ee5fcd79d55d201ecfedf404612873ca462c0a4d83eac376acbec6c4a3eb52a309dfa0d1095279b7bdfe50021b5d060702192b7c76d
@@ -1,3 +1,6 @@
1
+ ## 4.1.0
2
+ - Feat: added parser_options for more control over XML parsing [#68](https://github.com/logstash-plugins/logstash-filter-xml/pull/68)
3
+
1
4
  ## 4.0.7
2
5
  - Fixed creation of empty arrays when xpath failed [#59](https://github.com/logstash-plugins/logstash-filter-xml/pull/59)
3
6
 
@@ -34,6 +34,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
34
34
  | <<plugins-{type}s-{plugin}-force_array>> |<<boolean,boolean>>|No
35
35
  | <<plugins-{type}s-{plugin}-force_content>> |<<boolean,boolean>>|No
36
36
  | <<plugins-{type}s-{plugin}-namespaces>> |<<hash,hash>>|No
37
+ | <<plugins-{type}s-{plugin}-parser_options>> |<<string,string>>|No
37
38
  | <<plugins-{type}s-{plugin}-remove_namespaces>> |<<boolean,boolean>>|No
38
39
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
39
40
  | <<plugins-{type}s-{plugin}-store_xml>> |<<boolean,boolean>>|No
@@ -87,6 +88,17 @@ filter {
87
88
  }
88
89
  }
89
90
 
91
+ [id="plugins-{type}s-{plugin}-parser_options"]
92
+ ===== `parser_options`
93
+
94
+ * Value type is <<string,string>>
95
+ * There is no default value for this setting.
96
+
97
+ Setting XML parser options allows for more control of the parsing process.
98
+ By default the parser is not strict and thus accepts some invalid content.
99
+ Currently supported options are:
100
+
101
+ - `strict` - forces the parser to fail early instead of accumulating errors when content is not valid xml.
90
102
 
91
103
  [id="plugins-{type}s-{plugin}-remove_namespaces"]
92
104
  ===== `remove_namespaces`
@@ -58,6 +58,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
58
58
  #
59
59
  config :xpath, :validate => :hash, :default => {}
60
60
 
61
+ # Supported XML parsing options are 'strict', 'no_error' and 'no_warning'.
62
+ # - strict mode turns on strict parsing rules (non-compliant xml will fail)
63
+ # - no_error and no_warning can be used to suppress errors/warnings
64
+ config :parse_options, :validate => :string
65
+ # NOTE: technically we support more but we purposefully do not document those.
66
+ # e.g. setting "strict|recover" will not turn on strict as they're conflicting
67
+
61
68
  # By default the filter will store the whole parsed XML in the destination
62
69
  # field as described above. Setting this to false will prevent that.
63
70
  config :store_xml, :validate => :boolean, :default => true
@@ -110,6 +117,7 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
110
117
  :error => "When the 'store_xml' configuration option is true, 'target' must also be set"
111
118
  )
112
119
  end
120
+ xml_parse_options # validates parse_options => ...
113
121
  end
114
122
 
115
123
  def filter(event)
@@ -141,11 +149,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
141
149
 
142
150
  if @xpath
143
151
  begin
144
- doc = Nokogiri::XML(value, nil, value.encoding.to_s)
152
+ doc = Nokogiri::XML::Document.parse(value, nil, value.encoding.to_s, xml_parse_options)
145
153
  rescue => e
146
154
  event.tag(XMLPARSEFAILURE_TAG)
147
155
  @logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
148
156
  return
157
+ else
158
+ doc.errors.any? && @logger.debug? && @logger.debug("Parsed xml with #{doc.errors.size} errors")
149
159
  end
150
160
  doc.remove_namespaces! if @remove_namespaces
151
161
 
@@ -194,4 +204,26 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
194
204
  filter_matched(event) if matched
195
205
  @logger.debug? && @logger.debug("Event after xml filter", :event => event)
196
206
  end
207
+
208
+ private
209
+
210
+ def xml_parse_options
211
+ return Nokogiri::XML::ParseOptions::DEFAULT_XML unless @parse_options # (RECOVER | NONET)
212
+ @xml_parse_options ||= begin
213
+ parse_options = @parse_options.split(/,|\|/).map do |opt|
214
+ name = opt.strip.tr('_', '').upcase
215
+ if name.empty?
216
+ nil
217
+ else
218
+ begin
219
+ Nokogiri::XML::ParseOptions.const_get(name)
220
+ rescue NameError
221
+ raise LogStash::ConfigurationError, "unsupported parse option: #{opt.inspect}"
222
+ end
223
+ end
224
+ end
225
+ parse_options.compact.inject(0, :|) # e.g. NOERROR | NOWARNING
226
+ end
227
+ end
228
+
197
229
  end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-xml'
4
- s.version = '4.0.7'
4
+ s.version = '4.1.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses XML into fields"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -418,4 +418,63 @@ describe LogStash::Filters::Xml do
418
418
  end
419
419
  end
420
420
  end
421
+
422
+ describe "parsing invalid xml" do
423
+ subject { described_class.new(options) }
424
+ let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false }) }
425
+ let(:xmldata) { "<xml> <sample attr='foo' attr=\"bar\"> <invalid> </sample> </xml>" }
426
+ let(:event) { LogStash::Event.new(data) }
427
+ let(:data) { { "xmldata" => xmldata } }
428
+
429
+ before { subject.register }
430
+ after { subject.close }
431
+
432
+ it 'does not fail (by default)' do
433
+ subject.filter(event)
434
+ expect( event.get("tags") ).to be nil
435
+ end
436
+
437
+ context 'strict option' do
438
+ let(:options) { super.merge({ 'parse_options' => 'strict' }) }
439
+
440
+ it 'does fail parsing' do
441
+ subject.filter(event)
442
+ expect( event.get("tags") ).to_not be nil
443
+ expect( event.get("tags") ).to include '_xmlparsefailure'
444
+ end
445
+ end
446
+ end
447
+
448
+ describe "parse_options" do
449
+ subject { described_class.new(options) }
450
+ let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false, 'parse_options' => parse_options }) }
451
+
452
+ context 'strict (supported option)' do
453
+ let(:parse_options) { 'strict' }
454
+
455
+ it 'registers filter' do
456
+ subject.register
457
+ expect( subject.send(:xml_parse_options) ).
458
+ to eql Nokogiri::XML::ParseOptions::STRICT
459
+ end
460
+ end
461
+
462
+ context 'valid' do
463
+ let(:parse_options) { 'no_error,NOWARNING' }
464
+
465
+ it 'registers filter' do
466
+ subject.register
467
+ expect( subject.send(:xml_parse_options) ).
468
+ to eql Nokogiri::XML::ParseOptions::NOERROR | Nokogiri::XML::ParseOptions::NOWARNING
469
+ end
470
+ end
471
+
472
+ context 'invalid' do
473
+ let(:parse_options) { 'strict,invalid0' }
474
+
475
+ it 'fails to register' do
476
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, 'unsupported parse option: "invalid0"')
477
+ end
478
+ end
479
+ end
421
480
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-xml
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.7
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-27 00:00:00.000000000 Z
11
+ date: 2020-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement