logstash-filter-xml 4.0.7 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/index.asciidoc +12 -0
- data/lib/logstash/filters/xml.rb +33 -1
- data/logstash-filter-xml.gemspec +1 -1
- data/spec/filters/xml_spec.rb +59 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e533534aa762b35aa38226b37adfda6d03678657070439da36c3bd639a4947a5
|
4
|
+
data.tar.gz: f8010acbb8c698a6809748562301d1dc74d90396a5fc7a32cc4f23005201e8a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5dd55d1f420614894a97eb5e4848e1ef9302cb5879edad959d96c2b79855fa22928d62539585731451bc4da6771e2c97d8ff8bec7f157a0639f0f98afba532a
|
7
|
+
data.tar.gz: 8af5b8843765191ddce68ee5fcd79d55d201ecfedf404612873ca462c0a4d83eac376acbec6c4a3eb52a309dfa0d1095279b7bdfe50021b5d060702192b7c76d
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
## 4.1.0
|
2
|
+
- Feat: added parser_options for more control over XML parsing [#68](https://github.com/logstash-plugins/logstash-filter-xml/pull/68)
|
3
|
+
|
1
4
|
## 4.0.7
|
2
5
|
- Fixed creation of empty arrays when xpath failed [#59](https://github.com/logstash-plugins/logstash-filter-xml/pull/59)
|
3
6
|
|
data/docs/index.asciidoc
CHANGED
@@ -34,6 +34,7 @@ This plugin supports the following configuration options plus the <<plugins-{typ
|
|
34
34
|
| <<plugins-{type}s-{plugin}-force_array>> |<<boolean,boolean>>|No
|
35
35
|
| <<plugins-{type}s-{plugin}-force_content>> |<<boolean,boolean>>|No
|
36
36
|
| <<plugins-{type}s-{plugin}-namespaces>> |<<hash,hash>>|No
|
37
|
+
| <<plugins-{type}s-{plugin}-parser_options>> |<<string,string>>|No
|
37
38
|
| <<plugins-{type}s-{plugin}-remove_namespaces>> |<<boolean,boolean>>|No
|
38
39
|
| <<plugins-{type}s-{plugin}-source>> |<<string,string>>|Yes
|
39
40
|
| <<plugins-{type}s-{plugin}-store_xml>> |<<boolean,boolean>>|No
|
@@ -87,6 +88,17 @@ filter {
|
|
87
88
|
}
|
88
89
|
}
|
89
90
|
|
91
|
+
[id="plugins-{type}s-{plugin}-parser_options"]
|
92
|
+
===== `parser_options`
|
93
|
+
|
94
|
+
* Value type is <<string,string>>
|
95
|
+
* There is no default value for this setting.
|
96
|
+
|
97
|
+
Setting XML parser options allows for more control of the parsing process.
|
98
|
+
By default the parser is not strict and thus accepts some invalid content.
|
99
|
+
Currently supported options are:
|
100
|
+
|
101
|
+
- `strict` - forces the parser to fail early instead of accumulating errors when content is not valid xml.
|
90
102
|
|
91
103
|
[id="plugins-{type}s-{plugin}-remove_namespaces"]
|
92
104
|
===== `remove_namespaces`
|
data/lib/logstash/filters/xml.rb
CHANGED
@@ -58,6 +58,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
58
58
|
#
|
59
59
|
config :xpath, :validate => :hash, :default => {}
|
60
60
|
|
61
|
+
# Supported XML parsing options are 'strict', 'no_error' and 'no_warning'.
|
62
|
+
# - strict mode turns on strict parsing rules (non-compliant xml will fail)
|
63
|
+
# - no_error and no_warning can be used to suppress errors/warnings
|
64
|
+
config :parse_options, :validate => :string
|
65
|
+
# NOTE: technically we support more but we purposefully do not document those.
|
66
|
+
# e.g. setting "strict|recover" will not turn on strict as they're conflicting
|
67
|
+
|
61
68
|
# By default the filter will store the whole parsed XML in the destination
|
62
69
|
# field as described above. Setting this to false will prevent that.
|
63
70
|
config :store_xml, :validate => :boolean, :default => true
|
@@ -110,6 +117,7 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
110
117
|
:error => "When the 'store_xml' configuration option is true, 'target' must also be set"
|
111
118
|
)
|
112
119
|
end
|
120
|
+
xml_parse_options # validates parse_options => ...
|
113
121
|
end
|
114
122
|
|
115
123
|
def filter(event)
|
@@ -141,11 +149,13 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
141
149
|
|
142
150
|
if @xpath
|
143
151
|
begin
|
144
|
-
doc = Nokogiri::XML(value, nil, value.encoding.to_s)
|
152
|
+
doc = Nokogiri::XML::Document.parse(value, nil, value.encoding.to_s, xml_parse_options)
|
145
153
|
rescue => e
|
146
154
|
event.tag(XMLPARSEFAILURE_TAG)
|
147
155
|
@logger.warn("Error parsing xml", :source => @source, :value => value, :exception => e, :backtrace => e.backtrace)
|
148
156
|
return
|
157
|
+
else
|
158
|
+
doc.errors.any? && @logger.debug? && @logger.debug("Parsed xml with #{doc.errors.size} errors")
|
149
159
|
end
|
150
160
|
doc.remove_namespaces! if @remove_namespaces
|
151
161
|
|
@@ -194,4 +204,26 @@ class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
194
204
|
filter_matched(event) if matched
|
195
205
|
@logger.debug? && @logger.debug("Event after xml filter", :event => event)
|
196
206
|
end
|
207
|
+
|
208
|
+
private
|
209
|
+
|
210
|
+
def xml_parse_options
|
211
|
+
return Nokogiri::XML::ParseOptions::DEFAULT_XML unless @parse_options # (RECOVER | NONET)
|
212
|
+
@xml_parse_options ||= begin
|
213
|
+
parse_options = @parse_options.split(/,|\|/).map do |opt|
|
214
|
+
name = opt.strip.tr('_', '').upcase
|
215
|
+
if name.empty?
|
216
|
+
nil
|
217
|
+
else
|
218
|
+
begin
|
219
|
+
Nokogiri::XML::ParseOptions.const_get(name)
|
220
|
+
rescue NameError
|
221
|
+
raise LogStash::ConfigurationError, "unsupported parse option: #{opt.inspect}"
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
parse_options.compact.inject(0, :|) # e.g. NOERROR | NOWARNING
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
197
229
|
end
|
data/logstash-filter-xml.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-xml'
|
4
|
-
s.version = '4.0
|
4
|
+
s.version = '4.1.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parses XML into fields"
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
data/spec/filters/xml_spec.rb
CHANGED
@@ -418,4 +418,63 @@ describe LogStash::Filters::Xml do
|
|
418
418
|
end
|
419
419
|
end
|
420
420
|
end
|
421
|
+
|
422
|
+
describe "parsing invalid xml" do
|
423
|
+
subject { described_class.new(options) }
|
424
|
+
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false }) }
|
425
|
+
let(:xmldata) { "<xml> <sample attr='foo' attr=\"bar\"> <invalid> </sample> </xml>" }
|
426
|
+
let(:event) { LogStash::Event.new(data) }
|
427
|
+
let(:data) { { "xmldata" => xmldata } }
|
428
|
+
|
429
|
+
before { subject.register }
|
430
|
+
after { subject.close }
|
431
|
+
|
432
|
+
it 'does not fail (by default)' do
|
433
|
+
subject.filter(event)
|
434
|
+
expect( event.get("tags") ).to be nil
|
435
|
+
end
|
436
|
+
|
437
|
+
context 'strict option' do
|
438
|
+
let(:options) { super.merge({ 'parse_options' => 'strict' }) }
|
439
|
+
|
440
|
+
it 'does fail parsing' do
|
441
|
+
subject.filter(event)
|
442
|
+
expect( event.get("tags") ).to_not be nil
|
443
|
+
expect( event.get("tags") ).to include '_xmlparsefailure'
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
describe "parse_options" do
|
449
|
+
subject { described_class.new(options) }
|
450
|
+
let(:options) { ({ 'source' => 'xmldata', 'store_xml' => false, 'parse_options' => parse_options }) }
|
451
|
+
|
452
|
+
context 'strict (supported option)' do
|
453
|
+
let(:parse_options) { 'strict' }
|
454
|
+
|
455
|
+
it 'registers filter' do
|
456
|
+
subject.register
|
457
|
+
expect( subject.send(:xml_parse_options) ).
|
458
|
+
to eql Nokogiri::XML::ParseOptions::STRICT
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
context 'valid' do
|
463
|
+
let(:parse_options) { 'no_error,NOWARNING' }
|
464
|
+
|
465
|
+
it 'registers filter' do
|
466
|
+
subject.register
|
467
|
+
expect( subject.send(:xml_parse_options) ).
|
468
|
+
to eql Nokogiri::XML::ParseOptions::NOERROR | Nokogiri::XML::ParseOptions::NOWARNING
|
469
|
+
end
|
470
|
+
end
|
471
|
+
|
472
|
+
context 'invalid' do
|
473
|
+
let(:parse_options) { 'strict,invalid0' }
|
474
|
+
|
475
|
+
it 'fails to register' do
|
476
|
+
expect { subject.register }.to raise_error(LogStash::ConfigurationError, 'unsupported parse option: "invalid0"')
|
477
|
+
end
|
478
|
+
end
|
479
|
+
end
|
421
480
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-xml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0
|
4
|
+
version: 4.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|