logstash-filter-xml 4.0.7 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5752546d28d84f25aba5b8938f3554668455b2ba53a20bb2d9d555a32339bc21
4
- data.tar.gz: 6f35d6fcf1915907399f5c3254a4f555ded7e3e8e80e3118691ea4aba1d3b922
3
+ metadata.gz: e533534aa762b35aa38226b37adfda6d03678657070439da36c3bd639a4947a5
4
+ data.tar.gz: f8010acbb8c698a6809748562301d1dc74d90396a5fc7a32cc4f23005201e8a5
5
5
  SHA512:
6
- metadata.gz: b567fab011de36e7976dc9939b3d4ae06933d2c5ff56a54c95794c8d25a0f81e29d6aa2b22f459203dd9f9f3f5c30b9e5d750d96aec047f2ca990c66c5d553ea
7
- data.tar.gz: 3e5b8fedd5ee99177cf1462487100572aa8a4620985ba89bdf263e0d836b74381735bb919c9892d66d7f09f4323317c85f1e69f7f7a7597770799048cd1059a2
6
+ metadata.gz: e5dd55d1f420614894a97eb5e4848e1ef9302cb5879edad959d96c2b79855fa22928d62539585731451bc4da6771e2c97d8ff8bec7f157a0639f0f98afba532a
7
+ data.tar.gz: 8af5b8843765191ddce68ee5fcd79d55d201ecfedf404612873ca462c0a4d83eac376acbec6c4a3eb52a309dfa0d1095279b7bdfe50021b5d060702192b7c76d
@@ -1,3 +1,6 @@
1
+ ## 4.1.0
2
+ - Feat: added parser_options for more control over XML parsing [#68](https://github.com/logstash-plugins/logstash-filter-xml/pull/68)
3
+
1
4
  ## 4.0.7
2
5
  - Fixed creation of empty arrays when xpath failed [#59](https://github.com/logstash-plugins/logstash-filter-xml/pull/59)
3
6
 
@@ -34,6 +34,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
34
34
  | <<plugins-{type}s-{plugin}-force_array>> |<<boolean,boolean>>|No
35
35
  | <<plugins-{type}s-{plugin}-force_content>> |<<boolean,boolean>>|No
36
36
  | <<plugins-{type}s-{plugin}-namespaces>> |<<hash,hash>>|No
37
+ | <<plugins-{type}s-{plugin}-parser_options>> |<<string,string>>|No
37
38
  | <<plugins-{type}s-{plugin}-remove_namespaces>> |<<boolean,boolean>>|No
38
39
  | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
39
40
  | <<plugins-{type}s-{plugin}-store_xml>> |<<boolean,boolean>>|No
@@ -87,6 +88,17 @@ filter {
87
88
  }
88
89
  }
89
90
 
91
+ [id="plugins-{type}s-{plugin}-parser_options"]
92
+ ===== `parser_options`
93
+
94
+ * Value type is <<string,string>>
95
+ * There is no default value for this setting.
96
+
97
+ Setting XML parser options allows for more control of the parsing process.
98
+ By default the parser is not strict and thus accepts some invalid content.
99
+ Currently supported options are:
100
+
101
+ - `strict` - forces the parser to fail early instead of accumulating errors when content is not valid xml.
90
102
 
91
103
  [id="plugins-{type}s-{plugin}-remove_namespaces"]
92
104
  ===== `remove_namespaces`
@@ -58,6 +58,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
58
58
  #
59
59
  config :xpath, :validate => :hash, :default => {}
60
60
 
61
+ # Supported XML parsing options are 'strict', 'no_error' and 'no_warning'.
62
+ # - strict mode turns on strict parsing rules (non-compliant xml will fail)
63
+ # - no_error and no_warning can be used to suppress errors/warnings
64
+ config :parse_options, :validate => :string
65
+ # NOTE: technically we support more but we purposefully do not document those.
66
+ # e.g. setting "strict|recover" will not turn on strict as they're conflicting
67
+
61
68
  # By default the filter will store the whole parsed XML in the destination
62
69
  # field as described above. Setting this to false will prevent that.
63
70
  config :store_xml, :validate => :boolean, :default => true
@@ -110,6 +117,7 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
110
117
  :error => "When the 'store_xml' configuration option is true, 'target' must also be set"
111
118
  )
112
119
  end
120
+ xml_parse_options # validates parse_options => ...
113
121
  end
114
122
 
115
123
  def filter(event)
@@ -141,11 +149,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
141
149
 
142
150
  if @xpath
143
151
  begin
144
- doc = Nokogiri::XML(value, nil, value.encoding.to_s)
152
+ doc = Nokogiri::XML::Document.parse(value, nil, value.encoding.to_s, xml_parse_options)
145
153
  rescue => e
146
154
  event.tag(XMLPARSEFAILURE_TAG)
147
155
  @logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
148
156
  return
157
+ else
158
+ doc.errors.any? && @logger.debug? && @logger.debug("Parsed xml with #{doc.errors.size} errors")
149
159
  end
150
160
  doc.remove_namespaces! if @remove_namespaces
151
161
 
@@ -194,4 +204,26 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
194
204
  filter_matched(event) if matched
195
205
  @logger.debug? && @logger.debug("Event after xml filter", :event => event)
196
206
  end
207
+
208
+ private
209
+
210
+ def xml_parse_options
211
+ return Nokogiri::XML::ParseOptions::DEFAULT_XML unless @parse_options # (RECOVER | NONET)
212
+ @xml_parse_options ||= begin
213
+ parse_options = @parse_options.split(/,|\|/).map do |opt|
214
+ name = opt.strip.tr('_', '').upcase
215
+ if name.empty?
216
+ nil
217
+ else
218
+ begin
219
+ Nokogiri::XML::ParseOptions.const_get(name)
220
+ rescue NameError
221
+ raise LogStash::ConfigurationError, "unsupported parse option: #{opt.inspect}"
222
+ end
223
+ end
224
+ end
225
+ parse_options.compact.inject(0, :|) # e.g. NOERROR | NOWARNING
226
+ end
227
+ end
228
+
197
229
  end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-xml'
4
- s.version = '4.0.7'
4
+ s.version = '4.1.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Parses XML into fields"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -418,4 +418,63 @@ describe LogStash::Filters::Xml do
418
418
  end
419
419
  end
420
420
  end
421
+
422
+ describe "parsing invalid xml" do
423
+ subject { described_class.new(options) }
424
+ let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false }) }
425
+ let(:xmldata) { "<xml> <sample attr='foo' attr=\"bar\"> <invalid> </sample> </xml>" }
426
+ let(:event) { LogStash::Event.new(data) }
427
+ let(:data) { { "xmldata" => xmldata } }
428
+
429
+ before { subject.register }
430
+ after { subject.close }
431
+
432
+ it 'does not fail (by default)' do
433
+ subject.filter(event)
434
+ expect( event.get("tags") ).to be nil
435
+ end
436
+
437
+ context 'strict option' do
438
+ let(:options) { super.merge({ 'parse_options' => 'strict' }) }
439
+
440
+ it 'does fail parsing' do
441
+ subject.filter(event)
442
+ expect( event.get("tags") ).to_not be nil
443
+ expect( event.get("tags") ).to include '_xmlparsefailure'
444
+ end
445
+ end
446
+ end
447
+
448
+ describe "parse_options" do
449
+ subject { described_class.new(options) }
450
+ let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false, 'parse_options' => parse_options }) }
451
+
452
+ context 'strict (supported option)' do
453
+ let(:parse_options) { 'strict' }
454
+
455
+ it 'registers filter' do
456
+ subject.register
457
+ expect( subject.send(:xml_parse_options) ).
458
+ to eql Nokogiri::XML::ParseOptions::STRICT
459
+ end
460
+ end
461
+
462
+ context 'valid' do
463
+ let(:parse_options) { 'no_error,NOWARNING' }
464
+
465
+ it 'registers filter' do
466
+ subject.register
467
+ expect( subject.send(:xml_parse_options) ).
468
+ to eql Nokogiri::XML::ParseOptions::NOERROR | Nokogiri::XML::ParseOptions::NOWARNING
469
+ end
470
+ end
471
+
472
+ context 'invalid' do
473
+ let(:parse_options) { 'strict,invalid0' }
474
+
475
+ it 'fails to register' do
476
+ expect { subject.register }.to raise_error(LogStash::ConfigurationError, 'unsupported parse option: "invalid0"')
477
+ end
478
+ end
479
+ end
421
480
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-xml
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.7
4
+ version: 4.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-02-27 00:00:00.000000000 Z
11
+ date: 2020-01-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement