logstash-filter-xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +6 -0
- data/lib/logstash/filters/xml.rb +139 -0
- data/logstash-filter-xml.gemspec +27 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/filters/xml_spec.rb +175 -0
- metadata +88 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
!binary "U0hBMQ==":
|
|
3
|
+
metadata.gz: !binary |-
|
|
4
|
+
NjBhYWFkYzM0NjM2ODNkNmQwYTg3YzRkNjc0ZmUzMDFhYjkzNTkzNg==
|
|
5
|
+
data.tar.gz: !binary |-
|
|
6
|
+
ZjM5NWZhMzczOTZkZTRmOTFiZmVjNDNhOWNiNDk2YjJjODc3Y2M3OQ==
|
|
7
|
+
SHA512:
|
|
8
|
+
metadata.gz: !binary |-
|
|
9
|
+
ODNkOTFiNDVhODEyNWI5NzcwYmJiNWU3ZmNjYTJmNDRmMWZkMGY0ZjAwODg1
|
|
10
|
+
ZDhhOGUzYTg5NmYxOTQ0ZjBjZTAzYzFiMDQ2ZjVmYmMwYTcyY2U1YmEzODE1
|
|
11
|
+
Mzc4OTkxY2JiMGI1YmJiMDgzMzIyMDg2ZWU0ZjVjYTdmZTFkNWU=
|
|
12
|
+
data.tar.gz: !binary |-
|
|
13
|
+
MmJlM2M0MTRhNDEwODU1Y2I2OWE5ZGIxYWMxZGZkMTRkMDllNjhlNWJlYTE3
|
|
14
|
+
NzQ4NmU0YTI5ZDkxMzY4NGJkZDgxODc4NTZmMGQ2ODkxMDdmMWU1YTE4N2Fj
|
|
15
|
+
MGMxYmJhN2FmNGMyY2ZhOGZmOGQzMTFlMjUwYmVjMzFkMTEwM2M=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require "logstash/filters/base"
|
|
3
|
+
require "logstash/namespace"
|
|
4
|
+
|
|
5
|
+
# XML filter. Takes a field that contains XML and expands it into
|
|
6
|
+
# an actual datastructure.
|
|
7
|
+
class LogStash::Filters::Xml < LogStash::Filters::Base
|
|
8
|
+
|
|
9
|
+
config_name "xml"
|
|
10
|
+
milestone 1
|
|
11
|
+
|
|
12
|
+
# Config for xml to hash is:
|
|
13
|
+
#
|
|
14
|
+
# source => source_field
|
|
15
|
+
#
|
|
16
|
+
# For example, if you have the whole xml document in your @message field:
|
|
17
|
+
#
|
|
18
|
+
# filter {
|
|
19
|
+
# xml {
|
|
20
|
+
# source => "message"
|
|
21
|
+
# }
|
|
22
|
+
# }
|
|
23
|
+
#
|
|
24
|
+
# The above would parse the xml from the @message field
|
|
25
|
+
config :source, :validate => :string
|
|
26
|
+
|
|
27
|
+
# Define target for placing the data
|
|
28
|
+
#
|
|
29
|
+
# for example if you want the data to be put in the 'doc' field:
|
|
30
|
+
#
|
|
31
|
+
# filter {
|
|
32
|
+
# xml {
|
|
33
|
+
# target => "doc"
|
|
34
|
+
# }
|
|
35
|
+
# }
|
|
36
|
+
#
|
|
37
|
+
# XML in the value of the source field will be expanded into a
|
|
38
|
+
# datastructure in the "target" field.
|
|
39
|
+
# Note: if the "target" field already exists, it will be overridden
|
|
40
|
+
# Required
|
|
41
|
+
config :target, :validate => :string
|
|
42
|
+
|
|
43
|
+
# xpath will additionally select string values (.to_s on whatever is selected)
|
|
44
|
+
# from parsed XML (using each source field defined using the method above)
|
|
45
|
+
# and place those values in the destination fields. Configuration:
|
|
46
|
+
#
|
|
47
|
+
# xpath => [ "xpath-syntax", "destination-field" ]
|
|
48
|
+
#
|
|
49
|
+
# Values returned by XPath parsring from xpath-synatx will be put in the
|
|
50
|
+
# destination field. Multiple values returned will be pushed onto the
|
|
51
|
+
# destination field as an array. As such, multiple matches across
|
|
52
|
+
# multiple source fields will produce duplicate entries in the field
|
|
53
|
+
#
|
|
54
|
+
# More on xpath: http://www.w3schools.com/xpath/
|
|
55
|
+
#
|
|
56
|
+
# The xpath functions are particularly powerful:
|
|
57
|
+
# http://www.w3schools.com/xpath/xpath_functions.asp
|
|
58
|
+
#
|
|
59
|
+
config :xpath, :validate => :hash, :default => {}
|
|
60
|
+
|
|
61
|
+
# By default the filter will store the whole parsed xml in the destination
|
|
62
|
+
# field as described above. Setting this to false will prevent that.
|
|
63
|
+
config :store_xml, :validate => :boolean, :default => true
|
|
64
|
+
|
|
65
|
+
public
|
|
66
|
+
def register
|
|
67
|
+
require "nokogiri"
|
|
68
|
+
require "xmlsimple"
|
|
69
|
+
|
|
70
|
+
end # def register
|
|
71
|
+
|
|
72
|
+
public
|
|
73
|
+
def filter(event)
|
|
74
|
+
return unless filter?(event)
|
|
75
|
+
matched = false
|
|
76
|
+
|
|
77
|
+
@logger.debug("Running xml filter", :event => event)
|
|
78
|
+
|
|
79
|
+
return unless event.include?(@source)
|
|
80
|
+
|
|
81
|
+
value = event[@source]
|
|
82
|
+
|
|
83
|
+
if value.is_a?(Array) && value.length > 1
|
|
84
|
+
@logger.warn("XML filter only works on fields of length 1",
|
|
85
|
+
:source => @source, :value => value)
|
|
86
|
+
return
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Do nothing with an empty string.
|
|
90
|
+
return if value.strip.length == 0
|
|
91
|
+
|
|
92
|
+
if @xpath
|
|
93
|
+
begin
|
|
94
|
+
doc = Nokogiri::XML(value)
|
|
95
|
+
rescue => e
|
|
96
|
+
event.tag("_xmlparsefailure")
|
|
97
|
+
@logger.warn("Trouble parsing xml", :source => @source, :value => value,
|
|
98
|
+
:exception => e, :backtrace => e.backtrace)
|
|
99
|
+
return
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
@xpath.each do |xpath_src, xpath_dest|
|
|
103
|
+
nodeset = doc.xpath(xpath_src)
|
|
104
|
+
|
|
105
|
+
# If asking xpath for a String, like "name(/*)", we get back a
|
|
106
|
+
# String instead of a NodeSet. We normalize that here.
|
|
107
|
+
normalized_nodeset = nodeset.kind_of?(Nokogiri::XML::NodeSet) ? nodeset : [nodeset]
|
|
108
|
+
|
|
109
|
+
normalized_nodeset.each do |value|
|
|
110
|
+
# some XPath functions return empty arrays as string
|
|
111
|
+
if value.is_a?(Array)
|
|
112
|
+
return if value.length == 0
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
unless value.nil?
|
|
116
|
+
matched = true
|
|
117
|
+
event[xpath_dest] ||= []
|
|
118
|
+
event[xpath_dest] << value.to_s
|
|
119
|
+
end
|
|
120
|
+
end # XPath.each
|
|
121
|
+
end # @xpath.each
|
|
122
|
+
end # if @xpath
|
|
123
|
+
|
|
124
|
+
if @store_xml
|
|
125
|
+
begin
|
|
126
|
+
event[@target] = XmlSimple.xml_in(value)
|
|
127
|
+
matched = true
|
|
128
|
+
rescue => e
|
|
129
|
+
event.tag("_xmlparsefailure")
|
|
130
|
+
@logger.warn("Trouble parsing xml with XmlSimple", :source => @source,
|
|
131
|
+
:value => value, :exception => e, :backtrace => e.backtrace)
|
|
132
|
+
return
|
|
133
|
+
end
|
|
134
|
+
end # if @store_xml
|
|
135
|
+
|
|
136
|
+
filter_matched(event) if matched
|
|
137
|
+
@logger.debug("Event after xml filter", :event => event)
|
|
138
|
+
end # def filter
|
|
139
|
+
end # class LogStash::Filters::Xml
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Gem::Specification.new do |s|
|
|
2
|
+
|
|
3
|
+
s.name = 'logstash-filter-xml'
|
|
4
|
+
s.version = '0.1.0'
|
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
|
6
|
+
s.summary = "Takes a field that contains XML and expands it into an actual datastructure."
|
|
7
|
+
s.description = "Takes a field that contains XML and expands it into an actual datastructure."
|
|
8
|
+
s.authors = ["Elasticsearch"]
|
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
|
10
|
+
s.homepage = "http://logstash.net/"
|
|
11
|
+
s.require_paths = ["lib"]
|
|
12
|
+
|
|
13
|
+
# Files
|
|
14
|
+
s.files = `git ls-files`.split($\)
|
|
15
|
+
|
|
16
|
+
# Tests
|
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
|
18
|
+
|
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
|
20
|
+
s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
|
|
21
|
+
|
|
22
|
+
# Gem dependencies
|
|
23
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
|
24
|
+
s.add_runtime_dependency 'nokogiri'
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
require "gem_publisher"
|
|
2
|
+
|
|
3
|
+
desc "Publish gem to RubyGems.org"
|
|
4
|
+
task :publish_gem do |t|
|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
|
7
|
+
puts "Published #{gem}" if gem
|
|
8
|
+
end
|
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
require "net/http"
|
|
2
|
+
require "uri"
|
|
3
|
+
require "digest/sha1"
|
|
4
|
+
|
|
5
|
+
def vendor(*args)
|
|
6
|
+
return File.join("vendor", *args)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
|
10
|
+
mkdir task.name
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def fetch(url, sha1, output)
|
|
14
|
+
|
|
15
|
+
puts "Downloading #{url}"
|
|
16
|
+
actual_sha1 = download(url, output)
|
|
17
|
+
|
|
18
|
+
if actual_sha1 != sha1
|
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
|
20
|
+
end
|
|
21
|
+
end # def fetch
|
|
22
|
+
|
|
23
|
+
def file_fetch(url, sha1)
|
|
24
|
+
filename = File.basename( URI(url).path )
|
|
25
|
+
output = "vendor/#{filename}"
|
|
26
|
+
task output => [ "vendor/" ] do
|
|
27
|
+
begin
|
|
28
|
+
actual_sha1 = file_sha1(output)
|
|
29
|
+
if actual_sha1 != sha1
|
|
30
|
+
fetch(url, sha1, output)
|
|
31
|
+
end
|
|
32
|
+
rescue Errno::ENOENT
|
|
33
|
+
fetch(url, sha1, output)
|
|
34
|
+
end
|
|
35
|
+
end.invoke
|
|
36
|
+
|
|
37
|
+
return output
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def file_sha1(path)
|
|
41
|
+
digest = Digest::SHA1.new
|
|
42
|
+
fd = File.new(path, "r")
|
|
43
|
+
while true
|
|
44
|
+
begin
|
|
45
|
+
digest << fd.sysread(16384)
|
|
46
|
+
rescue EOFError
|
|
47
|
+
break
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
return digest.hexdigest
|
|
51
|
+
ensure
|
|
52
|
+
fd.close if fd
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def download(url, output)
|
|
56
|
+
uri = URI(url)
|
|
57
|
+
digest = Digest::SHA1.new
|
|
58
|
+
tmp = "#{output}.tmp"
|
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
|
61
|
+
http.request(request) do |response|
|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
|
64
|
+
count = 0
|
|
65
|
+
File.open(tmp, "w") do |fd|
|
|
66
|
+
response.read_body do |chunk|
|
|
67
|
+
fd.write(chunk)
|
|
68
|
+
digest << chunk
|
|
69
|
+
if size > 0 && $stdout.tty?
|
|
70
|
+
count += chunk.bytesize
|
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
File.rename(tmp, output)
|
|
80
|
+
|
|
81
|
+
return digest.hexdigest
|
|
82
|
+
rescue SocketError => e
|
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
|
84
|
+
raise
|
|
85
|
+
ensure
|
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
|
87
|
+
end # def download
|
|
88
|
+
|
|
89
|
+
def untar(tarball, &block)
|
|
90
|
+
require "archive/tar/minitar"
|
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
|
92
|
+
# Pull out typesdb
|
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
|
94
|
+
tar.each do |entry|
|
|
95
|
+
path = block.call(entry)
|
|
96
|
+
next if path.nil?
|
|
97
|
+
parent = File.dirname(path)
|
|
98
|
+
|
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
|
100
|
+
|
|
101
|
+
# Skip this file if the output file is the same size
|
|
102
|
+
if entry.directory?
|
|
103
|
+
mkdir path unless File.directory?(path)
|
|
104
|
+
else
|
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
|
106
|
+
if File.exists?(path)
|
|
107
|
+
stat = File.stat(path)
|
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
|
109
|
+
# expose headers in the entry.
|
|
110
|
+
entry_size = entry.instance_eval { @size }
|
|
111
|
+
# If file sizes are same, skip writing.
|
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
|
113
|
+
end
|
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
|
115
|
+
File.open(path, "w") do |fd|
|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
|
120
|
+
# TODO(sissel): File a bug about this.
|
|
121
|
+
while !entry.eof?
|
|
122
|
+
chunk = entry.read(16384)
|
|
123
|
+
fd.write(chunk)
|
|
124
|
+
end
|
|
125
|
+
#IO.copy_stream(entry, fd)
|
|
126
|
+
end
|
|
127
|
+
File.chmod(entry_mode, path)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
tar.close
|
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
|
132
|
+
end # def untar
|
|
133
|
+
|
|
134
|
+
def ungz(file)
|
|
135
|
+
|
|
136
|
+
outpath = file.gsub('.gz', '')
|
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
|
138
|
+
begin
|
|
139
|
+
File.open(outpath, "w") do |out|
|
|
140
|
+
IO::copy_stream(tgz, out)
|
|
141
|
+
end
|
|
142
|
+
File.unlink(file)
|
|
143
|
+
rescue
|
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
|
145
|
+
raise
|
|
146
|
+
end
|
|
147
|
+
tgz.close
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
desc "Process any vendor files required for this plugin"
|
|
151
|
+
task "vendor" do |task, args|
|
|
152
|
+
|
|
153
|
+
@files.each do |file|
|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
|
155
|
+
if download =~ /.tar.gz/
|
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
|
157
|
+
untar(download) do |entry|
|
|
158
|
+
if !file['files'].nil?
|
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
|
160
|
+
out = entry.full_name.split("/").last
|
|
161
|
+
end
|
|
162
|
+
File.join('vendor', out)
|
|
163
|
+
end
|
|
164
|
+
elsif download =~ /.gz/
|
|
165
|
+
ungz(download)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
end
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require "spec_helper"
|
|
3
|
+
require "logstash/filters/xml"
|
|
4
|
+
|
|
5
|
+
describe LogStash::Filters::Xml do
|
|
6
|
+
|
|
7
|
+
describe "parse standard xml (Deprecated checks)" do
|
|
8
|
+
config <<-CONFIG
|
|
9
|
+
filter {
|
|
10
|
+
xml {
|
|
11
|
+
source => "raw"
|
|
12
|
+
target => "data"
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
CONFIG
|
|
16
|
+
|
|
17
|
+
sample("raw" => '<foo key="value"/>') do
|
|
18
|
+
insist { subject["tags"] }.nil?
|
|
19
|
+
insist { subject["data"]} == {"key" => "value"}
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
#From parse xml with array as a value
|
|
23
|
+
sample("raw" => '<foo><key>value1</key><key>value2</key></foo>') do
|
|
24
|
+
insist { subject["tags"] }.nil?
|
|
25
|
+
insist { subject["data"]} == {"key" => ["value1", "value2"]}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
#From parse xml with hash as a value
|
|
29
|
+
sample("raw" => '<foo><key1><key2>value</key2></key1></foo>') do
|
|
30
|
+
insist { subject["tags"] }.nil?
|
|
31
|
+
insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
#From bad xml
|
|
35
|
+
sample("raw" => '<foo /') do
|
|
36
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe "parse standard xml but do not store (Deprecated checks)" do
|
|
41
|
+
config <<-CONFIG
|
|
42
|
+
filter {
|
|
43
|
+
xml {
|
|
44
|
+
source => "raw"
|
|
45
|
+
target => "data"
|
|
46
|
+
store_xml => false
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
CONFIG
|
|
50
|
+
|
|
51
|
+
sample("raw" => '<foo key="value"/>') do
|
|
52
|
+
insist { subject["tags"] }.nil?
|
|
53
|
+
insist { subject["data"]} == nil
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
describe "parse xml and store values with xpath (Deprecated checks)" do
|
|
58
|
+
config <<-CONFIG
|
|
59
|
+
filter {
|
|
60
|
+
xml {
|
|
61
|
+
source => "raw"
|
|
62
|
+
target => "data"
|
|
63
|
+
xpath => [ "/foo/key/text()", "xpath_field" ]
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
CONFIG
|
|
67
|
+
|
|
68
|
+
# Single value
|
|
69
|
+
sample("raw" => '<foo><key>value</key></foo>') do
|
|
70
|
+
insist { subject["tags"] }.nil?
|
|
71
|
+
insist { subject["xpath_field"]} == ["value"]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
#Multiple values
|
|
75
|
+
sample("raw" => '<foo><key>value1</key><key>value2</key></foo>') do
|
|
76
|
+
insist { subject["tags"] }.nil?
|
|
77
|
+
insist { subject["xpath_field"]} == ["value1","value2"]
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
## New tests
|
|
82
|
+
|
|
83
|
+
describe "parse standard xml" do
|
|
84
|
+
config <<-CONFIG
|
|
85
|
+
filter {
|
|
86
|
+
xml {
|
|
87
|
+
source => "xmldata"
|
|
88
|
+
target => "data"
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
CONFIG
|
|
92
|
+
|
|
93
|
+
sample("xmldata" => '<foo key="value"/>') do
|
|
94
|
+
insist { subject["tags"] }.nil?
|
|
95
|
+
insist { subject["data"]} == {"key" => "value"}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
#From parse xml with array as a value
|
|
99
|
+
sample("xmldata" => '<foo><key>value1</key><key>value2</key></foo>') do
|
|
100
|
+
insist { subject["tags"] }.nil?
|
|
101
|
+
insist { subject["data"]} == {"key" => ["value1", "value2"]}
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
#From parse xml with hash as a value
|
|
105
|
+
sample("xmldata" => '<foo><key1><key2>value</key2></key1></foo>') do
|
|
106
|
+
insist { subject["tags"] }.nil?
|
|
107
|
+
insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
#From bad xml
|
|
111
|
+
sample("xmldata" => '<foo /') do
|
|
112
|
+
insist { subject["tags"] }.include?("_xmlparsefailure")
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
describe "parse standard xml but do not store" do
|
|
117
|
+
config <<-CONFIG
|
|
118
|
+
filter {
|
|
119
|
+
xml {
|
|
120
|
+
source => "xmldata"
|
|
121
|
+
target => "data"
|
|
122
|
+
store_xml => false
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
CONFIG
|
|
126
|
+
|
|
127
|
+
sample("xmldata" => '<foo key="value"/>') do
|
|
128
|
+
insist { subject["tags"] }.nil?
|
|
129
|
+
insist { subject["data"]} == nil
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
describe "parse xml and store values with xpath" do
|
|
134
|
+
config <<-CONFIG
|
|
135
|
+
filter {
|
|
136
|
+
xml {
|
|
137
|
+
source => "xmldata"
|
|
138
|
+
target => "data"
|
|
139
|
+
xpath => [ "/foo/key/text()", "xpath_field" ]
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
CONFIG
|
|
143
|
+
|
|
144
|
+
# Single value
|
|
145
|
+
sample("xmldata" => '<foo><key>value</key></foo>') do
|
|
146
|
+
insist { subject["tags"] }.nil?
|
|
147
|
+
insist { subject["xpath_field"]} == ["value"]
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
#Multiple values
|
|
151
|
+
sample("xmldata" => '<foo><key>value1</key><key>value2</key></foo>') do
|
|
152
|
+
insist { subject["tags"] }.nil?
|
|
153
|
+
insist { subject["xpath_field"]} == ["value1","value2"]
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
describe "parse correctly non ascii content with xpath" do
|
|
158
|
+
config <<-CONFIG
|
|
159
|
+
filter {
|
|
160
|
+
xml {
|
|
161
|
+
source => "xmldata"
|
|
162
|
+
target => "data"
|
|
163
|
+
xpath => [ "/foo/key/text()", "xpath_field" ]
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
CONFIG
|
|
167
|
+
|
|
168
|
+
# Single value
|
|
169
|
+
sample("xmldata" => '<foo><key>Français</key></foo>') do
|
|
170
|
+
insist { subject["tags"] }.nil?
|
|
171
|
+
insist { subject["xpath_field"]} == ["Français"]
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: logstash-filter-xml
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Elasticsearch
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2014-10-25 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: logstash
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ! '>='
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 1.4.0
|
|
20
|
+
- - <
|
|
21
|
+
- !ruby/object:Gem::Version
|
|
22
|
+
version: 2.0.0
|
|
23
|
+
type: :runtime
|
|
24
|
+
prerelease: false
|
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
26
|
+
requirements:
|
|
27
|
+
- - ! '>='
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: 1.4.0
|
|
30
|
+
- - <
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 2.0.0
|
|
33
|
+
- !ruby/object:Gem::Dependency
|
|
34
|
+
name: nokogiri
|
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ! '>='
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '0'
|
|
40
|
+
type: :runtime
|
|
41
|
+
prerelease: false
|
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ! '>='
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: '0'
|
|
47
|
+
description: Takes a field that contains XML and expands it into an actual datastructure.
|
|
48
|
+
email: richard.pijnenburg@elasticsearch.com
|
|
49
|
+
executables: []
|
|
50
|
+
extensions: []
|
|
51
|
+
extra_rdoc_files: []
|
|
52
|
+
files:
|
|
53
|
+
- .gitignore
|
|
54
|
+
- Gemfile
|
|
55
|
+
- Rakefile
|
|
56
|
+
- lib/logstash/filters/xml.rb
|
|
57
|
+
- logstash-filter-xml.gemspec
|
|
58
|
+
- rakelib/publish.rake
|
|
59
|
+
- rakelib/vendor.rake
|
|
60
|
+
- spec/filters/xml_spec.rb
|
|
61
|
+
homepage: http://logstash.net/
|
|
62
|
+
licenses:
|
|
63
|
+
- Apache License (2.0)
|
|
64
|
+
metadata:
|
|
65
|
+
logstash_plugin: 'true'
|
|
66
|
+
group: filter
|
|
67
|
+
post_install_message:
|
|
68
|
+
rdoc_options: []
|
|
69
|
+
require_paths:
|
|
70
|
+
- lib
|
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ! '>='
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
|
+
requirements:
|
|
78
|
+
- - ! '>='
|
|
79
|
+
- !ruby/object:Gem::Version
|
|
80
|
+
version: '0'
|
|
81
|
+
requirements: []
|
|
82
|
+
rubyforge_project:
|
|
83
|
+
rubygems_version: 2.4.1
|
|
84
|
+
signing_key:
|
|
85
|
+
specification_version: 4
|
|
86
|
+
summary: Takes a field that contains XML and expands it into an actual datastructure.
|
|
87
|
+
test_files:
|
|
88
|
+
- spec/filters/xml_spec.rb
|