logstash-filter-prune 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/Gemfile +3 -0
- data/Rakefile +6 -0
- data/lib/logstash/filters/prune.rb +150 -0
- data/logstash-filter-prune.gemspec +26 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/filters/prune_spec.rb +441 -0
- metadata +76 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
Y2UxMGMyM2U5NTRmMmM4YjFlMDM0NzhlNDY0NWE5NmYxMDQzOTA5Yg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NWUzMTUxY2ZhNjY4YzFmNjJlNzM5YmZkYzQ0ODUzNjcyYmZiY2MwZg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZTVhYjVhOGNiODliMWE4ZGU4MTkzZTU3M2Y5ZmUwMzM1ODI0ZjBhOWFmZGNi
|
10
|
+
MzRmNDU3YWM0ZWIwNWY4Mjg5ZWMyYjAwM2I3NzEzZjc2ZWIyMTljNWYzZjFl
|
11
|
+
OWE4ZDdhMTMzNjg5MWU4ZDc2NjIwNTJiZGJhYmI1NzBkYzc5MWE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjJiN2ExYTcyMjIzMzViYWY4OTBlOGVkZGExMWE5OTdkOWNjMzhkNzJhOTA2
|
14
|
+
OTcxNDYyZmY1NDk2ZDAwMjA4MTdmNDFkZDI0MzkxMDgzYjI4N2NjODVlOGZj
|
15
|
+
MWEyMDQ4YWM1Y2Q3MWQ1YjMyZGFjZTFhZmM3MjEwODMzNTZlYTQ=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
|
5
|
+
|
6
|
+
# The prune filter is for pruning event data from @fileds based on whitelist/blacklist
|
7
|
+
# of field names or their values (names and values can also be regular expressions).
|
8
|
+
|
9
|
+
class LogStash::Filters::Prune < LogStash::Filters::Base
|
10
|
+
config_name "prune"
|
11
|
+
milestone 1
|
12
|
+
|
13
|
+
# Trigger whether configation fields and values should be interpolated for
|
14
|
+
# dynamic values.
|
15
|
+
# Probably adds some performance overhead. Defaults to false.
|
16
|
+
config :interpolate, :validate => :boolean, :default => false
|
17
|
+
|
18
|
+
# Include only fields only if their names match specified regexps, default to empty list which means include everything.
|
19
|
+
#
|
20
|
+
# filter {
|
21
|
+
# %PLUGIN% {
|
22
|
+
# tags => [ "apache-accesslog" ]
|
23
|
+
# whitelist_names => [ "method", "(referrer|status)", "${some}_field" ]
|
24
|
+
# }
|
25
|
+
# }
|
26
|
+
config :whitelist_names, :validate => :array, :default => []
|
27
|
+
|
28
|
+
# Exclude fields which names match specified regexps, by default exclude unresolved %{field} strings.
|
29
|
+
#
|
30
|
+
# filter {
|
31
|
+
# %PLUGIN% {
|
32
|
+
# tags => [ "apache-accesslog" ]
|
33
|
+
# blacklist_names => [ "method", "(referrer|status)", "${some}_field" ]
|
34
|
+
# }
|
35
|
+
# }
|
36
|
+
config :blacklist_names, :validate => :array, :default => [ "%\{[^}]+\}" ]
|
37
|
+
|
38
|
+
# Include specified fields only if their values match regexps.
|
39
|
+
# In case field values are arrays, the fields are pruned on per array item
|
40
|
+
# thus only matching array items will be included.
|
41
|
+
#
|
42
|
+
# filter {
|
43
|
+
# %PLUGIN% {
|
44
|
+
# tags => [ "apache-accesslog" ]
|
45
|
+
# whitelist_values => [ "uripath", "/index.php",
|
46
|
+
# "method", "(GET|POST)",
|
47
|
+
# "status", "^[^2]" ]
|
48
|
+
# }
|
49
|
+
# }
|
50
|
+
config :whitelist_values, :validate => :hash, :default => {}
|
51
|
+
|
52
|
+
# Exclude specified fields if their values match regexps.
|
53
|
+
# In case field values are arrays, the fields are pruned on per array item
|
54
|
+
# in case all array items are matched whole field will be deleted.
|
55
|
+
#
|
56
|
+
# filter {
|
57
|
+
# %PLUGIN% {
|
58
|
+
# tags => [ "apache-accesslog" ]
|
59
|
+
# blacklist_values => [ "uripath", "/index.php",
|
60
|
+
# "method", "(HEAD|OPTIONS)",
|
61
|
+
# "status", "^[^2]" ]
|
62
|
+
# }
|
63
|
+
# }
|
64
|
+
config :blacklist_values, :validate => :hash, :default => {}
|
65
|
+
|
66
|
+
public
|
67
|
+
def register
|
68
|
+
unless @interpolate
|
69
|
+
@whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(x)})
|
70
|
+
@blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(x)})
|
71
|
+
@whitelist_values.each do |key, value|
|
72
|
+
@whitelist_values[key] = Regexp.new(value)
|
73
|
+
end
|
74
|
+
@blacklist_values.each do |key, value|
|
75
|
+
@blacklist_values[key] = Regexp.new(value)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end # def register
|
79
|
+
|
80
|
+
public
|
81
|
+
def filter(event)
|
82
|
+
return unless filter?(event)
|
83
|
+
|
84
|
+
hash = event.to_hash
|
85
|
+
|
86
|
+
# We need to collect fields which needs to be remove ,and only in the end
|
87
|
+
# actually remove it since then interpolation mode you can get unexpected
|
88
|
+
# results as fields with dynamic values will not match since the fields to
|
89
|
+
# which they refer have already been removed.
|
90
|
+
fields_to_remove = []
|
91
|
+
|
92
|
+
unless @whitelist_names.empty?
|
93
|
+
@whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
|
94
|
+
hash.each_key do |field|
|
95
|
+
fields_to_remove << field unless field.match(@whitelist_names_regexp)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
unless @blacklist_names.empty?
|
100
|
+
@blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
|
101
|
+
hash.each_key do |field|
|
102
|
+
fields_to_remove << field if field.match(@blacklist_names_regexp)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
@whitelist_values.each do |key, value|
|
107
|
+
if @interpolate
|
108
|
+
key = event.sprintf(key)
|
109
|
+
value = Regexp.new(event.sprintf(value))
|
110
|
+
end
|
111
|
+
if hash[key]
|
112
|
+
if hash[key].is_a?(Array)
|
113
|
+
subvalues_to_remove = hash[key].find_all{|x| not x.match(value)}
|
114
|
+
unless subvalues_to_remove.empty?
|
115
|
+
fields_to_remove << (subvalues_to_remove.length == hash[key].length ? key : { :key => key, :values => subvalues_to_remove })
|
116
|
+
end
|
117
|
+
else
|
118
|
+
fields_to_remove << key if not hash[key].match(value)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
@blacklist_values.each do |key, value|
|
124
|
+
if @interpolate
|
125
|
+
key = event.sprintf(key)
|
126
|
+
value = Regexp.new(event.sprintf(value))
|
127
|
+
end
|
128
|
+
if hash[key]
|
129
|
+
if hash[key].is_a?(Array)
|
130
|
+
subvalues_to_remove = hash[key].find_all{|x| x.match(value)}
|
131
|
+
unless subvalues_to_remove.empty?
|
132
|
+
fields_to_remove << (subvalues_to_remove.length == hash[key].length ? key : { :key => key, :values => subvalues_to_remove })
|
133
|
+
end
|
134
|
+
else
|
135
|
+
fields_to_remove << key if hash[key].match(value)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
fields_to_remove.each do |field|
|
141
|
+
if field.is_a?(Hash)
|
142
|
+
hash[field[:key]] = hash[field[:key]] - field[:values]
|
143
|
+
else
|
144
|
+
hash.delete(field)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
filter_matched(event)
|
149
|
+
end # def filter
|
150
|
+
end # class LogStash::Filters::Prune
|
@@ -0,0 +1,26 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
|
3
|
+
s.name = 'logstash-filter-prune'
|
4
|
+
s.version = '0.1.0'
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = "The prune filter is for pruning event data from fields based on whitelist/blacklist of field names or their values (names and values can also be regular expressions)"
|
7
|
+
s.description = "The prune filter is for pruning event data from fields based on whitelist/blacklist of field names or their values (names and values can also be regular expressions)"
|
8
|
+
s.authors = ["Elasticsearch"]
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
10
|
+
s.homepage = "http://logstash.net/"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = `git ls-files`.split($\)
|
15
|
+
|
16
|
+
# Tests
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
18
|
+
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
20
|
+
s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
|
21
|
+
|
22
|
+
# Gem dependencies
|
23
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
24
|
+
|
25
|
+
end
|
26
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "gem_publisher"
|
2
|
+
|
3
|
+
desc "Publish gem to RubyGems.org"
|
4
|
+
task :publish_gem do |t|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
7
|
+
puts "Published #{gem}" if gem
|
8
|
+
end
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "uri"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
def vendor(*args)
|
6
|
+
return File.join("vendor", *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
10
|
+
mkdir task.name
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(url, sha1, output)
|
14
|
+
|
15
|
+
puts "Downloading #{url}"
|
16
|
+
actual_sha1 = download(url, output)
|
17
|
+
|
18
|
+
if actual_sha1 != sha1
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
20
|
+
end
|
21
|
+
end # def fetch
|
22
|
+
|
23
|
+
def file_fetch(url, sha1)
|
24
|
+
filename = File.basename( URI(url).path )
|
25
|
+
output = "vendor/#{filename}"
|
26
|
+
task output => [ "vendor/" ] do
|
27
|
+
begin
|
28
|
+
actual_sha1 = file_sha1(output)
|
29
|
+
if actual_sha1 != sha1
|
30
|
+
fetch(url, sha1, output)
|
31
|
+
end
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
fetch(url, sha1, output)
|
34
|
+
end
|
35
|
+
end.invoke
|
36
|
+
|
37
|
+
return output
|
38
|
+
end
|
39
|
+
|
40
|
+
def file_sha1(path)
|
41
|
+
digest = Digest::SHA1.new
|
42
|
+
fd = File.new(path, "r")
|
43
|
+
while true
|
44
|
+
begin
|
45
|
+
digest << fd.sysread(16384)
|
46
|
+
rescue EOFError
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return digest.hexdigest
|
51
|
+
ensure
|
52
|
+
fd.close if fd
|
53
|
+
end
|
54
|
+
|
55
|
+
def download(url, output)
|
56
|
+
uri = URI(url)
|
57
|
+
digest = Digest::SHA1.new
|
58
|
+
tmp = "#{output}.tmp"
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
61
|
+
http.request(request) do |response|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
64
|
+
count = 0
|
65
|
+
File.open(tmp, "w") do |fd|
|
66
|
+
response.read_body do |chunk|
|
67
|
+
fd.write(chunk)
|
68
|
+
digest << chunk
|
69
|
+
if size > 0 && $stdout.tty?
|
70
|
+
count += chunk.bytesize
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
File.rename(tmp, output)
|
80
|
+
|
81
|
+
return digest.hexdigest
|
82
|
+
rescue SocketError => e
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
84
|
+
raise
|
85
|
+
ensure
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
87
|
+
end # def download
|
88
|
+
|
89
|
+
def untar(tarball, &block)
|
90
|
+
require "archive/tar/minitar"
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
92
|
+
# Pull out typesdb
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
94
|
+
tar.each do |entry|
|
95
|
+
path = block.call(entry)
|
96
|
+
next if path.nil?
|
97
|
+
parent = File.dirname(path)
|
98
|
+
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
100
|
+
|
101
|
+
# Skip this file if the output file is the same size
|
102
|
+
if entry.directory?
|
103
|
+
mkdir path unless File.directory?(path)
|
104
|
+
else
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
106
|
+
if File.exists?(path)
|
107
|
+
stat = File.stat(path)
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
109
|
+
# expose headers in the entry.
|
110
|
+
entry_size = entry.instance_eval { @size }
|
111
|
+
# If file sizes are same, skip writing.
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
113
|
+
end
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
115
|
+
File.open(path, "w") do |fd|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
120
|
+
# TODO(sissel): File a bug about this.
|
121
|
+
while !entry.eof?
|
122
|
+
chunk = entry.read(16384)
|
123
|
+
fd.write(chunk)
|
124
|
+
end
|
125
|
+
#IO.copy_stream(entry, fd)
|
126
|
+
end
|
127
|
+
File.chmod(entry_mode, path)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
tar.close
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
132
|
+
end # def untar
|
133
|
+
|
134
|
+
def ungz(file)
|
135
|
+
|
136
|
+
outpath = file.gsub('.gz', '')
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
138
|
+
begin
|
139
|
+
File.open(outpath, "w") do |out|
|
140
|
+
IO::copy_stream(tgz, out)
|
141
|
+
end
|
142
|
+
File.unlink(file)
|
143
|
+
rescue
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
145
|
+
raise
|
146
|
+
end
|
147
|
+
tgz.close
|
148
|
+
end
|
149
|
+
|
150
|
+
desc "Process any vendor files required for this plugin"
|
151
|
+
task "vendor" do |task, args|
|
152
|
+
|
153
|
+
@files.each do |file|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
155
|
+
if download =~ /.tar.gz/
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
157
|
+
untar(download) do |entry|
|
158
|
+
if !file['files'].nil?
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
160
|
+
out = entry.full_name.split("/").last
|
161
|
+
end
|
162
|
+
File.join('vendor', out)
|
163
|
+
end
|
164
|
+
elsif download =~ /.gz/
|
165
|
+
ungz(download)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
@@ -0,0 +1,441 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require "logstash/filters/prune"
|
3
|
+
|
4
|
+
# Currently the prune filter has bugs and I can't really tell what the intended
|
5
|
+
# behavior is.
|
6
|
+
#
|
7
|
+
# See the 'whitelist field values with interpolation' test for a commented
|
8
|
+
# explanation of my confusion.
|
9
|
+
describe LogStash::Filters::Prune, :if => false do
|
10
|
+
|
11
|
+
|
12
|
+
describe "defaults" do
|
13
|
+
|
14
|
+
config <<-CONFIG
|
15
|
+
filter {
|
16
|
+
prune { }
|
17
|
+
}
|
18
|
+
CONFIG
|
19
|
+
|
20
|
+
sample(
|
21
|
+
"firstname" => "Borat",
|
22
|
+
"lastname" => "Sagdiyev",
|
23
|
+
"fullname" => "Borat Sagdiyev",
|
24
|
+
"country" => "Kazakhstan",
|
25
|
+
"location" => "Somethere in Kazakhstan",
|
26
|
+
"hobby" => "Cloud",
|
27
|
+
"status" => "200",
|
28
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
29
|
+
"%{hmm}" => "doh"
|
30
|
+
) do
|
31
|
+
insist { subject["firstname"] } == "Borat"
|
32
|
+
insist { subject["lastname"] } == "Sagdiyev"
|
33
|
+
insist { subject["fullname"] } == "Borat Sagdiyev"
|
34
|
+
insist { subject["country"] } == "Kazakhstan"
|
35
|
+
insist { subject["location"] } == "Somethere in Kazakhstan"
|
36
|
+
insist { subject["hobby"] } == "Cloud"
|
37
|
+
insist { subject["status"] } == "200"
|
38
|
+
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
|
39
|
+
insist { subject["%{hmm}"] } == nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe "whitelist field names" do
|
44
|
+
|
45
|
+
config <<-CONFIG
|
46
|
+
filter {
|
47
|
+
prune {
|
48
|
+
whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
|
49
|
+
}
|
50
|
+
}
|
51
|
+
CONFIG
|
52
|
+
|
53
|
+
sample(
|
54
|
+
"firstname" => "Borat",
|
55
|
+
"lastname" => "Sagdiyev",
|
56
|
+
"fullname" => "Borat Sagdiyev",
|
57
|
+
"country" => "Kazakhstan",
|
58
|
+
"location" => "Somethere in Kazakhstan",
|
59
|
+
"hobby" => "Cloud",
|
60
|
+
"status" => "200",
|
61
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
62
|
+
"%{hmm}" => "doh"
|
63
|
+
) do
|
64
|
+
insist { subject["firstname"] } == "Borat"
|
65
|
+
insist { subject["lastname"] } == nil
|
66
|
+
insist { subject["fullname"] } == nil
|
67
|
+
insist { subject["country"] } == nil
|
68
|
+
insist { subject["location"] } == nil
|
69
|
+
insist { subject["hobby"] } == "Cloud"
|
70
|
+
insist { subject["status"] } == "200"
|
71
|
+
insist { subject["Borat_saying"] } == nil
|
72
|
+
insist { subject["%{hmm}"] } == nil
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe "whitelist field names with interpolation" do
|
77
|
+
|
78
|
+
config <<-CONFIG
|
79
|
+
filter {
|
80
|
+
prune {
|
81
|
+
whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
|
82
|
+
interpolate => true
|
83
|
+
}
|
84
|
+
}
|
85
|
+
CONFIG
|
86
|
+
|
87
|
+
sample(
|
88
|
+
"firstname" => "Borat",
|
89
|
+
"lastname" => "Sagdiyev",
|
90
|
+
"fullname" => "Borat Sagdiyev",
|
91
|
+
"country" => "Kazakhstan",
|
92
|
+
"location" => "Somethere in Kazakhstan",
|
93
|
+
"hobby" => "Cloud",
|
94
|
+
"status" => "200",
|
95
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
96
|
+
"%{hmm}" => "doh"
|
97
|
+
) do
|
98
|
+
insist { subject["firstname"] } == "Borat"
|
99
|
+
insist { subject["lastname"] } == nil
|
100
|
+
insist { subject["fullname"] } == nil
|
101
|
+
insist { subject["country"] } == nil
|
102
|
+
insist { subject["location"] } == nil
|
103
|
+
insist { subject["hobby"] } == "Cloud"
|
104
|
+
insist { subject["status"] } == "200"
|
105
|
+
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
|
106
|
+
insist { subject["%{hmm}"] } == nil
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "blacklist field names" do
|
111
|
+
|
112
|
+
config <<-CONFIG
|
113
|
+
filter {
|
114
|
+
prune {
|
115
|
+
blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
|
116
|
+
}
|
117
|
+
}
|
118
|
+
CONFIG
|
119
|
+
|
120
|
+
sample(
|
121
|
+
"firstname" => "Borat",
|
122
|
+
"lastname" => "Sagdiyev",
|
123
|
+
"fullname" => "Borat Sagdiyev",
|
124
|
+
"country" => "Kazakhstan",
|
125
|
+
"location" => "Somethere in Kazakhstan",
|
126
|
+
"hobby" => "Cloud",
|
127
|
+
"status" => "200",
|
128
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
129
|
+
"%{hmm}" => "doh"
|
130
|
+
) do
|
131
|
+
insist { subject["firstname"] } == nil
|
132
|
+
insist { subject["lastname"] } == "Sagdiyev"
|
133
|
+
insist { subject["fullname"] } == "Borat Sagdiyev"
|
134
|
+
insist { subject["country"] } == "Kazakhstan"
|
135
|
+
insist { subject["location"] } == "Somethere in Kazakhstan"
|
136
|
+
insist { subject["hobby"] } == nil
|
137
|
+
insist { subject["status"] } == nil
|
138
|
+
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
|
139
|
+
insist { subject["%{hmm}"] } == "doh"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
describe "blacklist field names with interpolation" do
|
144
|
+
|
145
|
+
config <<-CONFIG
|
146
|
+
filter {
|
147
|
+
prune {
|
148
|
+
blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
|
149
|
+
interpolate => true
|
150
|
+
}
|
151
|
+
}
|
152
|
+
CONFIG
|
153
|
+
|
154
|
+
sample(
|
155
|
+
"firstname" => "Borat",
|
156
|
+
"lastname" => "Sagdiyev",
|
157
|
+
"fullname" => "Borat Sagdiyev",
|
158
|
+
"country" => "Kazakhstan",
|
159
|
+
"location" => "Somethere in Kazakhstan",
|
160
|
+
"hobby" => "Cloud",
|
161
|
+
"status" => "200",
|
162
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
163
|
+
"%{hmm}" => "doh"
|
164
|
+
) do
|
165
|
+
insist { subject["firstname"] } == nil
|
166
|
+
insist { subject["lastname"] } == "Sagdiyev"
|
167
|
+
insist { subject["fullname"] } == "Borat Sagdiyev"
|
168
|
+
insist { subject["country"] } == "Kazakhstan"
|
169
|
+
insist { subject["location"] } == "Somethere in Kazakhstan"
|
170
|
+
insist { subject["hobby"] } == nil
|
171
|
+
insist { subject["status"] } == nil
|
172
|
+
insist { subject["Borat_saying"] } == nil
|
173
|
+
insist { subject["%{hmm}"] } == "doh"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
describe "whitelist field values" do
|
178
|
+
|
179
|
+
config <<-CONFIG
|
180
|
+
filter {
|
181
|
+
prune {
|
182
|
+
# This should only permit fields named 'firstname', 'fullname',
|
183
|
+
# 'location', 'status', etc.
|
184
|
+
whitelist_values => [ "firstname", "^Borat$",
|
185
|
+
"fullname", "%{firstname} Sagdiyev",
|
186
|
+
"location", "no no no",
|
187
|
+
"status", "^2",
|
188
|
+
"%{firstname}_saying", "%{hobby}.*Active" ]
|
189
|
+
}
|
190
|
+
}
|
191
|
+
CONFIG
|
192
|
+
|
193
|
+
sample(
|
194
|
+
"firstname" => "Borat",
|
195
|
+
"lastname" => "Sagdiyev",
|
196
|
+
"fullname" => "Borat Sagdiyev",
|
197
|
+
"country" => "Kazakhstan",
|
198
|
+
"location" => "Somethere in Kazakhstan",
|
199
|
+
"hobby" => "Cloud",
|
200
|
+
"status" => "200",
|
201
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
202
|
+
"%{hmm}" => "doh"
|
203
|
+
) do
|
204
|
+
insist { subject["firstname"] } == "Borat"
|
205
|
+
|
206
|
+
# TODO(sissel): According to the config above, this should be nil because
|
207
|
+
# it is not in the list of whitelisted fields, but we expect it to be
|
208
|
+
# "Sagdiyev" ? I am confused.
|
209
|
+
insist { subject["lastname"] } == "Sagdiyev"
|
210
|
+
insist { subject["fullname"] } == nil
|
211
|
+
insist { subject["country"] } == "Kazakhstan"
|
212
|
+
insist { subject["location"] } == nil
|
213
|
+
insist { subject["hobby"] } == "Cloud"
|
214
|
+
insist { subject["status"] } == "200"
|
215
|
+
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
|
216
|
+
|
217
|
+
# TODO(sissel): Contrary to the 'lastname' check, we expect %{hmm} field
|
218
|
+
# to be nil because it is not whitelisted, yes? Contradictory insists
|
219
|
+
# here. I don't know what the intended behavior is... Seems like
|
220
|
+
# whitelist means 'anything not here' but since this test is written
|
221
|
+
# confusingly, I dont' know how to move forward.
|
222
|
+
insist { subject["%{hmm}"] } == nil
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
describe "whitelist field values with interpolation" do
|
227
|
+
|
228
|
+
config <<-CONFIG
|
229
|
+
filter {
|
230
|
+
prune {
|
231
|
+
whitelist_values => [ "firstname", "^Borat$",
|
232
|
+
"fullname", "%{firstname} Sagdiyev",
|
233
|
+
"location", "no no no",
|
234
|
+
"status", "^2",
|
235
|
+
"%{firstname}_saying", "%{hobby}.*Active" ]
|
236
|
+
interpolate => true
|
237
|
+
}
|
238
|
+
}
|
239
|
+
CONFIG
|
240
|
+
|
241
|
+
sample(
|
242
|
+
"firstname" => "Borat",
|
243
|
+
"lastname" => "Sagdiyev",
|
244
|
+
"fullname" => "Borat Sagdiyev",
|
245
|
+
"country" => "Kazakhstan",
|
246
|
+
"location" => "Somethere in Kazakhstan",
|
247
|
+
"hobby" => "Cloud",
|
248
|
+
"status" => "200",
|
249
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
250
|
+
"%{hmm}" => "doh"
|
251
|
+
) do
|
252
|
+
insist { subject["firstname"] } == "Borat"
|
253
|
+
insist { subject["lastname"] } == "Sagdiyev"
|
254
|
+
insist { subject["fullname"] } == "Borat Sagdiyev"
|
255
|
+
insist { subject["country"] } == "Kazakhstan"
|
256
|
+
insist { subject["location"] } == nil
|
257
|
+
insist { subject["hobby"] } == "Cloud"
|
258
|
+
insist { subject["status"] } == "200"
|
259
|
+
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
|
260
|
+
insist { subject["%{hmm}"] } == nil
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
describe "blacklist field values" do
|
265
|
+
|
266
|
+
config <<-CONFIG
|
267
|
+
filter {
|
268
|
+
prune {
|
269
|
+
blacklist_values => [ "firstname", "^Borat$",
|
270
|
+
"fullname", "%{firstname} Sagdiyev",
|
271
|
+
"location", "no no no",
|
272
|
+
"status", "^2",
|
273
|
+
"%{firstname}_saying", "%{hobby}.*Active" ]
|
274
|
+
}
|
275
|
+
}
|
276
|
+
CONFIG
|
277
|
+
|
278
|
+
sample(
|
279
|
+
"firstname" => "Borat",
|
280
|
+
"lastname" => "Sagdiyev",
|
281
|
+
"fullname" => "Borat Sagdiyev",
|
282
|
+
"country" => "Kazakhstan",
|
283
|
+
"location" => "Somethere in Kazakhstan",
|
284
|
+
"hobby" => "Cloud",
|
285
|
+
"status" => "200",
|
286
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
287
|
+
"%{hmm}" => "doh"
|
288
|
+
) do
|
289
|
+
insist { subject["firstname"] } == nil
|
290
|
+
insist { subject["lastname"] } == "Sagdiyev"
|
291
|
+
insist { subject["fullname"] } == "Borat Sagdiyev"
|
292
|
+
insist { subject["country"] } == "Kazakhstan"
|
293
|
+
insist { subject["location"] } == "Somethere in Kazakhstan"
|
294
|
+
insist { subject["hobby"] } == "Cloud"
|
295
|
+
insist { subject["status"] } == nil
|
296
|
+
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
|
297
|
+
insist { subject["%{hmm}"] } == nil
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
describe "blacklist field values with interpolation" do
|
302
|
+
|
303
|
+
config <<-CONFIG
|
304
|
+
filter {
|
305
|
+
prune {
|
306
|
+
blacklist_values => [ "firstname", "^Borat$",
|
307
|
+
"fullname", "%{firstname} Sagdiyev",
|
308
|
+
"location", "no no no",
|
309
|
+
"status", "^2",
|
310
|
+
"%{firstname}_saying", "%{hobby}.*Active" ]
|
311
|
+
interpolate => true
|
312
|
+
}
|
313
|
+
}
|
314
|
+
CONFIG
|
315
|
+
|
316
|
+
sample(
|
317
|
+
"firstname" => "Borat",
|
318
|
+
"lastname" => "Sagdiyev",
|
319
|
+
"fullname" => "Borat Sagdiyev",
|
320
|
+
"country" => "Kazakhstan",
|
321
|
+
"location" => "Somethere in Kazakhstan",
|
322
|
+
"hobby" => "Cloud",
|
323
|
+
"status" => "200",
|
324
|
+
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
|
325
|
+
"%{hmm}" => "doh"
|
326
|
+
) do
|
327
|
+
insist { subject["firstname"] } == nil
|
328
|
+
insist { subject["lastname"] } == "Sagdiyev"
|
329
|
+
insist { subject["fullname"] } == nil
|
330
|
+
insist { subject["country"] } == "Kazakhstan"
|
331
|
+
insist { subject["location"] } == "Somethere in Kazakhstan"
|
332
|
+
insist { subject["hobby"] } == "Cloud"
|
333
|
+
insist { subject["status"] } == nil
|
334
|
+
insist { subject["Borat_saying"] } == nil
|
335
|
+
insist { subject["%{hmm}"] } == nil
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
describe "whitelist field values on fields witn array values" do
|
340
|
+
|
341
|
+
config <<-CONFIG
|
342
|
+
filter {
|
343
|
+
prune {
|
344
|
+
whitelist_values => [ "status", "^(1|2|3)",
|
345
|
+
"xxx", "3",
|
346
|
+
"error", "%{blah}" ]
|
347
|
+
}
|
348
|
+
}
|
349
|
+
CONFIG
|
350
|
+
|
351
|
+
sample(
|
352
|
+
"blah" => "foo",
|
353
|
+
"xxx" => [ "1 2 3", "3 4 5" ],
|
354
|
+
"status" => [ "100", "200", "300", "400", "500" ],
|
355
|
+
"error" => [ "This is foolish" , "Need smthing smart too" ]
|
356
|
+
) do
|
357
|
+
insist { subject["blah"] } == "foo"
|
358
|
+
insist { subject["error"] } == nil
|
359
|
+
insist { subject["xxx"] } == [ "1 2 3", "3 4 5" ]
|
360
|
+
insist { subject["status"] } == [ "100", "200", "300" ]
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
describe "blacklist field values on fields witn array values" do
|
365
|
+
|
366
|
+
config <<-CONFIG
|
367
|
+
filter {
|
368
|
+
prune {
|
369
|
+
blacklist_values => [ "status", "^(1|2|3)",
|
370
|
+
"xxx", "3",
|
371
|
+
"error", "%{blah}" ]
|
372
|
+
}
|
373
|
+
}
|
374
|
+
CONFIG
|
375
|
+
|
376
|
+
sample(
|
377
|
+
"blah" => "foo",
|
378
|
+
"xxx" => [ "1 2 3", "3 4 5" ],
|
379
|
+
"status" => [ "100", "200", "300", "400", "500" ],
|
380
|
+
"error" => [ "This is foolish", "Need smthing smart too" ]
|
381
|
+
) do
|
382
|
+
insist { subject["blah"] } == "foo"
|
383
|
+
insist { subject["error"] } == [ "This is foolish", "Need smthing smart too" ]
|
384
|
+
insist { subject["xxx"] } == nil
|
385
|
+
insist { subject["status"] } == [ "400", "500" ]
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
describe "whitelist field values with interpolation on fields witn array values" do
|
390
|
+
|
391
|
+
config <<-CONFIG
|
392
|
+
filter {
|
393
|
+
prune {
|
394
|
+
whitelist_values => [ "status", "^(1|2|3)",
|
395
|
+
"xxx", "3",
|
396
|
+
"error", "%{blah}" ]
|
397
|
+
interpolate => true
|
398
|
+
}
|
399
|
+
}
|
400
|
+
CONFIG
|
401
|
+
|
402
|
+
sample(
|
403
|
+
"blah" => "foo",
|
404
|
+
"xxx" => [ "1 2 3", "3 4 5" ],
|
405
|
+
"status" => [ "100", "200", "300", "400", "500" ],
|
406
|
+
"error" => [ "This is foolish" , "Need smthing smart too" ]
|
407
|
+
) do
|
408
|
+
insist { subject["blah"] } == "foo"
|
409
|
+
insist { subject["error"] } == [ "This is foolish" ]
|
410
|
+
insist { subject["xxx"] } == [ "1 2 3", "3 4 5" ]
|
411
|
+
insist { subject["status"] } == [ "100", "200", "300" ]
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
describe "blacklist field values with interpolation on fields witn array values" do
|
416
|
+
|
417
|
+
config <<-CONFIG
|
418
|
+
filter {
|
419
|
+
prune {
|
420
|
+
blacklist_values => [ "status", "^(1|2|3)",
|
421
|
+
"xxx", "3",
|
422
|
+
"error", "%{blah}" ]
|
423
|
+
interpolate => true
|
424
|
+
}
|
425
|
+
}
|
426
|
+
CONFIG
|
427
|
+
|
428
|
+
sample(
|
429
|
+
"blah" => "foo",
|
430
|
+
"xxx" => [ "1 2 3", "3 4 5" ],
|
431
|
+
"status" => [ "100", "200", "300", "400", "500" ],
|
432
|
+
"error" => [ "This is foolish" , "Need smthing smart too" ]
|
433
|
+
) do
|
434
|
+
insist { subject["blah"] } == "foo"
|
435
|
+
insist { subject["error"] } == [ "Need smthing smart too" ]
|
436
|
+
insist { subject["xxx"] } == nil
|
437
|
+
insist { subject["status"] } == [ "400", "500" ]
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
end
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-filter-prune
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elasticsearch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: logstash
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.0
|
20
|
+
- - <
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.0
|
30
|
+
- - <
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.0.0
|
33
|
+
description: The prune filter is for pruning event data from fields based on whitelist/blacklist
|
34
|
+
of field names or their values (names and values can also be regular expressions)
|
35
|
+
email: richard.pijnenburg@elasticsearch.com
|
36
|
+
executables: []
|
37
|
+
extensions: []
|
38
|
+
extra_rdoc_files: []
|
39
|
+
files:
|
40
|
+
- .gitignore
|
41
|
+
- Gemfile
|
42
|
+
- Rakefile
|
43
|
+
- lib/logstash/filters/prune.rb
|
44
|
+
- logstash-filter-prune.gemspec
|
45
|
+
- rakelib/publish.rake
|
46
|
+
- rakelib/vendor.rake
|
47
|
+
- spec/filters/prune_spec.rb
|
48
|
+
homepage: http://logstash.net/
|
49
|
+
licenses:
|
50
|
+
- Apache License (2.0)
|
51
|
+
metadata:
|
52
|
+
logstash_plugin: 'true'
|
53
|
+
group: filter
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ! '>='
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ! '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
requirements: []
|
69
|
+
rubyforge_project:
|
70
|
+
rubygems_version: 2.4.1
|
71
|
+
signing_key:
|
72
|
+
specification_version: 4
|
73
|
+
summary: The prune filter is for pruning event data from fields based on whitelist/blacklist
|
74
|
+
of field names or their values (names and values can also be regular expressions)
|
75
|
+
test_files:
|
76
|
+
- spec/filters/prune_spec.rb
|