logstash-filter-xml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NjBhYWFkYzM0NjM2ODNkNmQwYTg3YzRkNjc0ZmUzMDFhYjkzNTkzNg==
5
+ data.tar.gz: !binary |-
6
+ ZjM5NWZhMzczOTZkZTRmOTFiZmVjNDNhOWNiNDk2YjJjODc3Y2M3OQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODNkOTFiNDVhODEyNWI5NzcwYmJiNWU3ZmNjYTJmNDRmMWZkMGY0ZjAwODg1
10
+ ZDhhOGUzYTg5NmYxOTQ0ZjBjZTAzYzFiMDQ2ZjVmYmMwYTcyY2U1YmEzODE1
11
+ Mzc4OTkxY2JiMGI1YmJiMDgzMzIyMDg2ZWU0ZjVjYTdmZTFkNWU=
12
+ data.tar.gz: !binary |-
13
+ MmJlM2M0MTRhNDEwODU1Y2I2OWE5ZGIxYWMxZGZkMTRkMDllNjhlNWJlYTE3
14
+ NzQ4NmU0YTI5ZDkxMzY4NGJkZDgxODc4NTZmMGQ2ODkxMDdmMWU1YTE4N2Fj
15
+ MGMxYmJhN2FmNGMyY2ZhOGZmOGQzMTFlMjUwYmVjMzFkMTEwM2M=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,139 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+ # XML filter. Takes a field that contains XML and expands it into
6
+ # an actual datastructure.
7
+ class LogStash::Filters::Xml < LogStash::Filters::Base
8
+
9
+ config_name "xml"
10
+ milestone 1
11
+
12
+ # Config for xml to hash is:
13
+ #
14
+ # source => source_field
15
+ #
16
+ # For example, if you have the whole xml document in your @message field:
17
+ #
18
+ # filter {
19
+ # xml {
20
+ # source => "message"
21
+ # }
22
+ # }
23
+ #
24
+ # The above would parse the xml from the @message field
25
+ config :source, :validate => :string
26
+
27
+ # Define target for placing the data
28
+ #
29
+ # for example if you want the data to be put in the 'doc' field:
30
+ #
31
+ # filter {
32
+ # xml {
33
+ # target => "doc"
34
+ # }
35
+ # }
36
+ #
37
+ # XML in the value of the source field will be expanded into a
38
+ # datastructure in the "target" field.
39
+ # Note: if the "target" field already exists, it will be overridden
40
+ # Required
41
+ config :target, :validate => :string
42
+
43
+ # xpath will additionally select string values (.to_s on whatever is selected)
44
+ # from parsed XML (using each source field defined using the method above)
45
+ # and place those values in the destination fields. Configuration:
46
+ #
47
+ # xpath => [ "xpath-syntax", "destination-field" ]
48
+ #
49
+ # Values returned by XPath parsring from xpath-synatx will be put in the
50
+ # destination field. Multiple values returned will be pushed onto the
51
+ # destination field as an array. As such, multiple matches across
52
+ # multiple source fields will produce duplicate entries in the field
53
+ #
54
+ # More on xpath: http://www.w3schools.com/xpath/
55
+ #
56
+ # The xpath functions are particularly powerful:
57
+ # http://www.w3schools.com/xpath/xpath_functions.asp
58
+ #
59
+ config :xpath, :validate => :hash, :default => {}
60
+
61
+ # By default the filter will store the whole parsed xml in the destination
62
+ # field as described above. Setting this to false will prevent that.
63
+ config :store_xml, :validate => :boolean, :default => true
64
+
65
+ public
66
+ def register
67
+ require "nokogiri"
68
+ require "xmlsimple"
69
+
70
+ end # def register
71
+
72
+ public
73
+ def filter(event)
74
+ return unless filter?(event)
75
+ matched = false
76
+
77
+ @logger.debug("Running xml filter", :event => event)
78
+
79
+ return unless event.include?(@source)
80
+
81
+ value = event[@source]
82
+
83
+ if value.is_a?(Array) && value.length > 1
84
+ @logger.warn("XML filter only works on fields of length 1",
85
+ :source => @source, :value => value)
86
+ return
87
+ end
88
+
89
+ # Do nothing with an empty string.
90
+ return if value.strip.length == 0
91
+
92
+ if @xpath
93
+ begin
94
+ doc = Nokogiri::XML(value)
95
+ rescue => e
96
+ event.tag("_xmlparsefailure")
97
+ @logger.warn("Trouble parsing xml", :source => @source, :value => value,
98
+ :exception => e, :backtrace => e.backtrace)
99
+ return
100
+ end
101
+
102
+ @xpath.each do |xpath_src, xpath_dest|
103
+ nodeset = doc.xpath(xpath_src)
104
+
105
+ # If asking xpath for a String, like "name(/*)", we get back a
106
+ # String instead of a NodeSet. We normalize that here.
107
+ normalized_nodeset = nodeset.kind_of?(Nokogiri::XML::NodeSet) ? nodeset : [nodeset]
108
+
109
+ normalized_nodeset.each do |value|
110
+ # some XPath functions return empty arrays as string
111
+ if value.is_a?(Array)
112
+ return if value.length == 0
113
+ end
114
+
115
+ unless value.nil?
116
+ matched = true
117
+ event[xpath_dest] ||= []
118
+ event[xpath_dest] << value.to_s
119
+ end
120
+ end # XPath.each
121
+ end # @xpath.each
122
+ end # if @xpath
123
+
124
+ if @store_xml
125
+ begin
126
+ event[@target] = XmlSimple.xml_in(value)
127
+ matched = true
128
+ rescue => e
129
+ event.tag("_xmlparsefailure")
130
+ @logger.warn("Trouble parsing xml with XmlSimple", :source => @source,
131
+ :value => value, :exception => e, :backtrace => e.backtrace)
132
+ return
133
+ end
134
+ end # if @store_xml
135
+
136
+ filter_matched(event) if matched
137
+ @logger.debug("Event after xml filter", :event => event)
138
+ end # def filter
139
+ end # class LogStash::Filters::Xml
@@ -0,0 +1,27 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-xml'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Takes a field that contains XML and expands it into an actual datastructure."
7
+ s.description = "Takes a field that contains XML and expands it into an actual datastructure."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+ s.add_runtime_dependency 'nokogiri'
25
+
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,175 @@
1
+ # encoding: utf-8
2
+ require "spec_helper"
3
+ require "logstash/filters/xml"
4
+
5
+ describe LogStash::Filters::Xml do
6
+
7
+ describe "parse standard xml (Deprecated checks)" do
8
+ config <<-CONFIG
9
+ filter {
10
+ xml {
11
+ source => "raw"
12
+ target => "data"
13
+ }
14
+ }
15
+ CONFIG
16
+
17
+ sample("raw" => '<foo key="value"/>') do
18
+ insist { subject["tags"] }.nil?
19
+ insist { subject["data"]} == {"key" => "value"}
20
+ end
21
+
22
+ #From parse xml with array as a value
23
+ sample("raw" => '<foo><key>value1</key><key>value2</key></foo>') do
24
+ insist { subject["tags"] }.nil?
25
+ insist { subject["data"]} == {"key" => ["value1", "value2"]}
26
+ end
27
+
28
+ #From parse xml with hash as a value
29
+ sample("raw" => '<foo><key1><key2>value</key2></key1></foo>') do
30
+ insist { subject["tags"] }.nil?
31
+ insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
32
+ end
33
+
34
+ #From bad xml
35
+ sample("raw" => '<foo /') do
36
+ insist { subject["tags"] }.include?("_xmlparsefailure")
37
+ end
38
+ end
39
+
40
+ describe "parse standard xml but do not store (Deprecated checks)" do
41
+ config <<-CONFIG
42
+ filter {
43
+ xml {
44
+ source => "raw"
45
+ target => "data"
46
+ store_xml => false
47
+ }
48
+ }
49
+ CONFIG
50
+
51
+ sample("raw" => '<foo key="value"/>') do
52
+ insist { subject["tags"] }.nil?
53
+ insist { subject["data"]} == nil
54
+ end
55
+ end
56
+
57
+ describe "parse xml and store values with xpath (Deprecated checks)" do
58
+ config <<-CONFIG
59
+ filter {
60
+ xml {
61
+ source => "raw"
62
+ target => "data"
63
+ xpath => [ "/foo/key/text()", "xpath_field" ]
64
+ }
65
+ }
66
+ CONFIG
67
+
68
+ # Single value
69
+ sample("raw" => '<foo><key>value</key></foo>') do
70
+ insist { subject["tags"] }.nil?
71
+ insist { subject["xpath_field"]} == ["value"]
72
+ end
73
+
74
+ #Multiple values
75
+ sample("raw" => '<foo><key>value1</key><key>value2</key></foo>') do
76
+ insist { subject["tags"] }.nil?
77
+ insist { subject["xpath_field"]} == ["value1","value2"]
78
+ end
79
+ end
80
+
81
+ ## New tests
82
+
83
+ describe "parse standard xml" do
84
+ config <<-CONFIG
85
+ filter {
86
+ xml {
87
+ source => "xmldata"
88
+ target => "data"
89
+ }
90
+ }
91
+ CONFIG
92
+
93
+ sample("xmldata" => '<foo key="value"/>') do
94
+ insist { subject["tags"] }.nil?
95
+ insist { subject["data"]} == {"key" => "value"}
96
+ end
97
+
98
+ #From parse xml with array as a value
99
+ sample("xmldata" => '<foo><key>value1</key><key>value2</key></foo>') do
100
+ insist { subject["tags"] }.nil?
101
+ insist { subject["data"]} == {"key" => ["value1", "value2"]}
102
+ end
103
+
104
+ #From parse xml with hash as a value
105
+ sample("xmldata" => '<foo><key1><key2>value</key2></key1></foo>') do
106
+ insist { subject["tags"] }.nil?
107
+ insist { subject["data"]} == {"key1" => [{"key2" => ["value"]}]}
108
+ end
109
+
110
+ #From bad xml
111
+ sample("xmldata" => '<foo /') do
112
+ insist { subject["tags"] }.include?("_xmlparsefailure")
113
+ end
114
+ end
115
+
116
+ describe "parse standard xml but do not store" do
117
+ config <<-CONFIG
118
+ filter {
119
+ xml {
120
+ source => "xmldata"
121
+ target => "data"
122
+ store_xml => false
123
+ }
124
+ }
125
+ CONFIG
126
+
127
+ sample("xmldata" => '<foo key="value"/>') do
128
+ insist { subject["tags"] }.nil?
129
+ insist { subject["data"]} == nil
130
+ end
131
+ end
132
+
133
+ describe "parse xml and store values with xpath" do
134
+ config <<-CONFIG
135
+ filter {
136
+ xml {
137
+ source => "xmldata"
138
+ target => "data"
139
+ xpath => [ "/foo/key/text()", "xpath_field" ]
140
+ }
141
+ }
142
+ CONFIG
143
+
144
+ # Single value
145
+ sample("xmldata" => '<foo><key>value</key></foo>') do
146
+ insist { subject["tags"] }.nil?
147
+ insist { subject["xpath_field"]} == ["value"]
148
+ end
149
+
150
+ #Multiple values
151
+ sample("xmldata" => '<foo><key>value1</key><key>value2</key></foo>') do
152
+ insist { subject["tags"] }.nil?
153
+ insist { subject["xpath_field"]} == ["value1","value2"]
154
+ end
155
+ end
156
+
157
+ describe "parse correctly non ascii content with xpath" do
158
+ config <<-CONFIG
159
+ filter {
160
+ xml {
161
+ source => "xmldata"
162
+ target => "data"
163
+ xpath => [ "/foo/key/text()", "xpath_field" ]
164
+ }
165
+ }
166
+ CONFIG
167
+
168
+ # Single value
169
+ sample("xmldata" => '<foo><key>Français</key></foo>') do
170
+ insist { subject["tags"] }.nil?
171
+ insist { subject["xpath_field"]} == ["Français"]
172
+ end
173
+ end
174
+
175
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-xml
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-10-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: nokogiri
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ description: Takes a field that contains XML and expands it into an actual datastructure.
48
+ email: richard.pijnenburg@elasticsearch.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - Rakefile
56
+ - lib/logstash/filters/xml.rb
57
+ - logstash-filter-xml.gemspec
58
+ - rakelib/publish.rake
59
+ - rakelib/vendor.rake
60
+ - spec/filters/xml_spec.rb
61
+ homepage: http://logstash.net/
62
+ licenses:
63
+ - Apache License (2.0)
64
+ metadata:
65
+ logstash_plugin: 'true'
66
+ group: filter
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ! '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ requirements: []
82
+ rubyforge_project:
83
+ rubygems_version: 2.4.1
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Takes a field that contains XML and expands it into an actual datastructure.
87
+ test_files:
88
+ - spec/filters/xml_spec.rb