logstash-filter-prune 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Y2UxMGMyM2U5NTRmMmM4YjFlMDM0NzhlNDY0NWE5NmYxMDQzOTA5Yg==
5
+ data.tar.gz: !binary |-
6
+ NWUzMTUxY2ZhNjY4YzFmNjJlNzM5YmZkYzQ0ODUzNjcyYmZiY2MwZg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZTVhYjVhOGNiODliMWE4ZGU4MTkzZTU3M2Y5ZmUwMzM1ODI0ZjBhOWFmZGNi
10
+ MzRmNDU3YWM0ZWIwNWY4Mjg5ZWMyYjAwM2I3NzEzZjc2ZWIyMTljNWYzZjFl
11
+ OWE4ZDdhMTMzNjg5MWU4ZDc2NjIwNTJiZGJhYmI1NzBkYzc5MWE=
12
+ data.tar.gz: !binary |-
13
+ ZjJiN2ExYTcyMjIzMzViYWY4OTBlOGVkZGExMWE5OTdkOWNjMzhkNzJhOTA2
14
+ OTcxNDYyZmY1NDk2ZDAwMjA4MTdmNDFkZDI0MzkxMDgzYjI4N2NjODVlOGZj
15
+ MWEyMDQ4YWM1Y2Q3MWQ1YjMyZGFjZTFhZmM3MjEwODMzNTZlYTQ=
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,150 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+
6
+ # The prune filter is for pruning event data from @fileds based on whitelist/blacklist
7
+ # of field names or their values (names and values can also be regular expressions).
8
+
9
+ class LogStash::Filters::Prune < LogStash::Filters::Base
10
+ config_name "prune"
11
+ milestone 1
12
+
13
+ # Trigger whether configation fields and values should be interpolated for
14
+ # dynamic values.
15
+ # Probably adds some performance overhead. Defaults to false.
16
+ config :interpolate, :validate => :boolean, :default => false
17
+
18
+ # Include only fields only if their names match specified regexps, default to empty list which means include everything.
19
+ #
20
+ # filter {
21
+ # %PLUGIN% {
22
+ # tags => [ "apache-accesslog" ]
23
+ # whitelist_names => [ "method", "(referrer|status)", "${some}_field" ]
24
+ # }
25
+ # }
26
+ config :whitelist_names, :validate => :array, :default => []
27
+
28
+ # Exclude fields which names match specified regexps, by default exclude unresolved %{field} strings.
29
+ #
30
+ # filter {
31
+ # %PLUGIN% {
32
+ # tags => [ "apache-accesslog" ]
33
+ # blacklist_names => [ "method", "(referrer|status)", "${some}_field" ]
34
+ # }
35
+ # }
36
+ config :blacklist_names, :validate => :array, :default => [ "%\{[^}]+\}" ]
37
+
38
+ # Include specified fields only if their values match regexps.
39
+ # In case field values are arrays, the fields are pruned on per array item
40
+ # thus only matching array items will be included.
41
+ #
42
+ # filter {
43
+ # %PLUGIN% {
44
+ # tags => [ "apache-accesslog" ]
45
+ # whitelist_values => [ "uripath", "/index.php",
46
+ # "method", "(GET|POST)",
47
+ # "status", "^[^2]" ]
48
+ # }
49
+ # }
50
+ config :whitelist_values, :validate => :hash, :default => {}
51
+
52
+ # Exclude specified fields if their values match regexps.
53
+ # In case field values are arrays, the fields are pruned on per array item
54
+ # in case all array items are matched whole field will be deleted.
55
+ #
56
+ # filter {
57
+ # %PLUGIN% {
58
+ # tags => [ "apache-accesslog" ]
59
+ # blacklist_values => [ "uripath", "/index.php",
60
+ # "method", "(HEAD|OPTIONS)",
61
+ # "status", "^[^2]" ]
62
+ # }
63
+ # }
64
+ config :blacklist_values, :validate => :hash, :default => {}
65
+
66
+ public
67
+ def register
68
+ unless @interpolate
69
+ @whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(x)})
70
+ @blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(x)})
71
+ @whitelist_values.each do |key, value|
72
+ @whitelist_values[key] = Regexp.new(value)
73
+ end
74
+ @blacklist_values.each do |key, value|
75
+ @blacklist_values[key] = Regexp.new(value)
76
+ end
77
+ end
78
+ end # def register
79
+
80
+ public
81
+ def filter(event)
82
+ return unless filter?(event)
83
+
84
+ hash = event.to_hash
85
+
86
+ # We need to collect fields which needs to be remove ,and only in the end
87
+ # actually remove it since then interpolation mode you can get unexpected
88
+ # results as fields with dynamic values will not match since the fields to
89
+ # which they refer have already been removed.
90
+ fields_to_remove = []
91
+
92
+ unless @whitelist_names.empty?
93
+ @whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
94
+ hash.each_key do |field|
95
+ fields_to_remove << field unless field.match(@whitelist_names_regexp)
96
+ end
97
+ end
98
+
99
+ unless @blacklist_names.empty?
100
+ @blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
101
+ hash.each_key do |field|
102
+ fields_to_remove << field if field.match(@blacklist_names_regexp)
103
+ end
104
+ end
105
+
106
+ @whitelist_values.each do |key, value|
107
+ if @interpolate
108
+ key = event.sprintf(key)
109
+ value = Regexp.new(event.sprintf(value))
110
+ end
111
+ if hash[key]
112
+ if hash[key].is_a?(Array)
113
+ subvalues_to_remove = hash[key].find_all{|x| not x.match(value)}
114
+ unless subvalues_to_remove.empty?
115
+ fields_to_remove << (subvalues_to_remove.length == hash[key].length ? key : { :key => key, :values => subvalues_to_remove })
116
+ end
117
+ else
118
+ fields_to_remove << key if not hash[key].match(value)
119
+ end
120
+ end
121
+ end
122
+
123
+ @blacklist_values.each do |key, value|
124
+ if @interpolate
125
+ key = event.sprintf(key)
126
+ value = Regexp.new(event.sprintf(value))
127
+ end
128
+ if hash[key]
129
+ if hash[key].is_a?(Array)
130
+ subvalues_to_remove = hash[key].find_all{|x| x.match(value)}
131
+ unless subvalues_to_remove.empty?
132
+ fields_to_remove << (subvalues_to_remove.length == hash[key].length ? key : { :key => key, :values => subvalues_to_remove })
133
+ end
134
+ else
135
+ fields_to_remove << key if hash[key].match(value)
136
+ end
137
+ end
138
+ end
139
+
140
+ fields_to_remove.each do |field|
141
+ if field.is_a?(Hash)
142
+ hash[field[:key]] = hash[field[:key]] - field[:values]
143
+ else
144
+ hash.delete(field)
145
+ end
146
+ end
147
+
148
+ filter_matched(event)
149
+ end # def filter
150
+ end # class LogStash::Filters::Prune
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-prune'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "The prune filter is for pruning event data from fields based on whitelist/blacklist of field names or their values (names and values can also be regular expressions)"
7
+ s.description = "The prune filter is for pruning event data from fields based on whitelist/blacklist of field names or their values (names and values can also be regular expressions)"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ end
26
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,441 @@
1
+ require "spec_helper"
2
+ require "logstash/filters/prune"
3
+
4
+ # Currently the prune filter has bugs and I can't really tell what the intended
5
+ # behavior is.
6
+ #
7
+ # See the 'whitelist field values with interpolation' test for a commented
8
+ # explanation of my confusion.
9
+ describe LogStash::Filters::Prune, :if => false do
10
+
11
+
12
+ describe "defaults" do
13
+
14
+ config <<-CONFIG
15
+ filter {
16
+ prune { }
17
+ }
18
+ CONFIG
19
+
20
+ sample(
21
+ "firstname" => "Borat",
22
+ "lastname" => "Sagdiyev",
23
+ "fullname" => "Borat Sagdiyev",
24
+ "country" => "Kazakhstan",
25
+ "location" => "Somethere in Kazakhstan",
26
+ "hobby" => "Cloud",
27
+ "status" => "200",
28
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
29
+ "%{hmm}" => "doh"
30
+ ) do
31
+ insist { subject["firstname"] } == "Borat"
32
+ insist { subject["lastname"] } == "Sagdiyev"
33
+ insist { subject["fullname"] } == "Borat Sagdiyev"
34
+ insist { subject["country"] } == "Kazakhstan"
35
+ insist { subject["location"] } == "Somethere in Kazakhstan"
36
+ insist { subject["hobby"] } == "Cloud"
37
+ insist { subject["status"] } == "200"
38
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
39
+ insist { subject["%{hmm}"] } == nil
40
+ end
41
+ end
42
+
43
+ describe "whitelist field names" do
44
+
45
+ config <<-CONFIG
46
+ filter {
47
+ prune {
48
+ whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
49
+ }
50
+ }
51
+ CONFIG
52
+
53
+ sample(
54
+ "firstname" => "Borat",
55
+ "lastname" => "Sagdiyev",
56
+ "fullname" => "Borat Sagdiyev",
57
+ "country" => "Kazakhstan",
58
+ "location" => "Somethere in Kazakhstan",
59
+ "hobby" => "Cloud",
60
+ "status" => "200",
61
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
62
+ "%{hmm}" => "doh"
63
+ ) do
64
+ insist { subject["firstname"] } == "Borat"
65
+ insist { subject["lastname"] } == nil
66
+ insist { subject["fullname"] } == nil
67
+ insist { subject["country"] } == nil
68
+ insist { subject["location"] } == nil
69
+ insist { subject["hobby"] } == "Cloud"
70
+ insist { subject["status"] } == "200"
71
+ insist { subject["Borat_saying"] } == nil
72
+ insist { subject["%{hmm}"] } == nil
73
+ end
74
+ end
75
+
76
+ describe "whitelist field names with interpolation" do
77
+
78
+ config <<-CONFIG
79
+ filter {
80
+ prune {
81
+ whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
82
+ interpolate => true
83
+ }
84
+ }
85
+ CONFIG
86
+
87
+ sample(
88
+ "firstname" => "Borat",
89
+ "lastname" => "Sagdiyev",
90
+ "fullname" => "Borat Sagdiyev",
91
+ "country" => "Kazakhstan",
92
+ "location" => "Somethere in Kazakhstan",
93
+ "hobby" => "Cloud",
94
+ "status" => "200",
95
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
96
+ "%{hmm}" => "doh"
97
+ ) do
98
+ insist { subject["firstname"] } == "Borat"
99
+ insist { subject["lastname"] } == nil
100
+ insist { subject["fullname"] } == nil
101
+ insist { subject["country"] } == nil
102
+ insist { subject["location"] } == nil
103
+ insist { subject["hobby"] } == "Cloud"
104
+ insist { subject["status"] } == "200"
105
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
106
+ insist { subject["%{hmm}"] } == nil
107
+ end
108
+ end
109
+
110
+ describe "blacklist field names" do
111
+
112
+ config <<-CONFIG
113
+ filter {
114
+ prune {
115
+ blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
116
+ }
117
+ }
118
+ CONFIG
119
+
120
+ sample(
121
+ "firstname" => "Borat",
122
+ "lastname" => "Sagdiyev",
123
+ "fullname" => "Borat Sagdiyev",
124
+ "country" => "Kazakhstan",
125
+ "location" => "Somethere in Kazakhstan",
126
+ "hobby" => "Cloud",
127
+ "status" => "200",
128
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
129
+ "%{hmm}" => "doh"
130
+ ) do
131
+ insist { subject["firstname"] } == nil
132
+ insist { subject["lastname"] } == "Sagdiyev"
133
+ insist { subject["fullname"] } == "Borat Sagdiyev"
134
+ insist { subject["country"] } == "Kazakhstan"
135
+ insist { subject["location"] } == "Somethere in Kazakhstan"
136
+ insist { subject["hobby"] } == nil
137
+ insist { subject["status"] } == nil
138
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
139
+ insist { subject["%{hmm}"] } == "doh"
140
+ end
141
+ end
142
+
143
+ describe "blacklist field names with interpolation" do
144
+
145
+ config <<-CONFIG
146
+ filter {
147
+ prune {
148
+ blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
149
+ interpolate => true
150
+ }
151
+ }
152
+ CONFIG
153
+
154
+ sample(
155
+ "firstname" => "Borat",
156
+ "lastname" => "Sagdiyev",
157
+ "fullname" => "Borat Sagdiyev",
158
+ "country" => "Kazakhstan",
159
+ "location" => "Somethere in Kazakhstan",
160
+ "hobby" => "Cloud",
161
+ "status" => "200",
162
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
163
+ "%{hmm}" => "doh"
164
+ ) do
165
+ insist { subject["firstname"] } == nil
166
+ insist { subject["lastname"] } == "Sagdiyev"
167
+ insist { subject["fullname"] } == "Borat Sagdiyev"
168
+ insist { subject["country"] } == "Kazakhstan"
169
+ insist { subject["location"] } == "Somethere in Kazakhstan"
170
+ insist { subject["hobby"] } == nil
171
+ insist { subject["status"] } == nil
172
+ insist { subject["Borat_saying"] } == nil
173
+ insist { subject["%{hmm}"] } == "doh"
174
+ end
175
+ end
176
+
177
+ describe "whitelist field values" do
178
+
179
+ config <<-CONFIG
180
+ filter {
181
+ prune {
182
+ # This should only permit fields named 'firstname', 'fullname',
183
+ # 'location', 'status', etc.
184
+ whitelist_values => [ "firstname", "^Borat$",
185
+ "fullname", "%{firstname} Sagdiyev",
186
+ "location", "no no no",
187
+ "status", "^2",
188
+ "%{firstname}_saying", "%{hobby}.*Active" ]
189
+ }
190
+ }
191
+ CONFIG
192
+
193
+ sample(
194
+ "firstname" => "Borat",
195
+ "lastname" => "Sagdiyev",
196
+ "fullname" => "Borat Sagdiyev",
197
+ "country" => "Kazakhstan",
198
+ "location" => "Somethere in Kazakhstan",
199
+ "hobby" => "Cloud",
200
+ "status" => "200",
201
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
202
+ "%{hmm}" => "doh"
203
+ ) do
204
+ insist { subject["firstname"] } == "Borat"
205
+
206
+ # TODO(sissel): According to the config above, this should be nil because
207
+ # it is not in the list of whitelisted fields, but we expect it to be
208
+ # "Sagdiyev" ? I am confused.
209
+ insist { subject["lastname"] } == "Sagdiyev"
210
+ insist { subject["fullname"] } == nil
211
+ insist { subject["country"] } == "Kazakhstan"
212
+ insist { subject["location"] } == nil
213
+ insist { subject["hobby"] } == "Cloud"
214
+ insist { subject["status"] } == "200"
215
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
216
+
217
+ # TODO(sissel): Contrary to the 'lastname' check, we expect %{hmm} field
218
+ # to be nil because it is not whitelisted, yes? Contradictory insists
219
+ # here. I don't know what the intended behavior is... Seems like
220
+ # whitelist means 'anything not here' but since this test is written
221
+ # confusingly, I dont' know how to move forward.
222
+ insist { subject["%{hmm}"] } == nil
223
+ end
224
+ end
225
+
226
+ describe "whitelist field values with interpolation" do
227
+
228
+ config <<-CONFIG
229
+ filter {
230
+ prune {
231
+ whitelist_values => [ "firstname", "^Borat$",
232
+ "fullname", "%{firstname} Sagdiyev",
233
+ "location", "no no no",
234
+ "status", "^2",
235
+ "%{firstname}_saying", "%{hobby}.*Active" ]
236
+ interpolate => true
237
+ }
238
+ }
239
+ CONFIG
240
+
241
+ sample(
242
+ "firstname" => "Borat",
243
+ "lastname" => "Sagdiyev",
244
+ "fullname" => "Borat Sagdiyev",
245
+ "country" => "Kazakhstan",
246
+ "location" => "Somethere in Kazakhstan",
247
+ "hobby" => "Cloud",
248
+ "status" => "200",
249
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
250
+ "%{hmm}" => "doh"
251
+ ) do
252
+ insist { subject["firstname"] } == "Borat"
253
+ insist { subject["lastname"] } == "Sagdiyev"
254
+ insist { subject["fullname"] } == "Borat Sagdiyev"
255
+ insist { subject["country"] } == "Kazakhstan"
256
+ insist { subject["location"] } == nil
257
+ insist { subject["hobby"] } == "Cloud"
258
+ insist { subject["status"] } == "200"
259
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
260
+ insist { subject["%{hmm}"] } == nil
261
+ end
262
+ end
263
+
264
+ describe "blacklist field values" do
265
+
266
+ config <<-CONFIG
267
+ filter {
268
+ prune {
269
+ blacklist_values => [ "firstname", "^Borat$",
270
+ "fullname", "%{firstname} Sagdiyev",
271
+ "location", "no no no",
272
+ "status", "^2",
273
+ "%{firstname}_saying", "%{hobby}.*Active" ]
274
+ }
275
+ }
276
+ CONFIG
277
+
278
+ sample(
279
+ "firstname" => "Borat",
280
+ "lastname" => "Sagdiyev",
281
+ "fullname" => "Borat Sagdiyev",
282
+ "country" => "Kazakhstan",
283
+ "location" => "Somethere in Kazakhstan",
284
+ "hobby" => "Cloud",
285
+ "status" => "200",
286
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
287
+ "%{hmm}" => "doh"
288
+ ) do
289
+ insist { subject["firstname"] } == nil
290
+ insist { subject["lastname"] } == "Sagdiyev"
291
+ insist { subject["fullname"] } == "Borat Sagdiyev"
292
+ insist { subject["country"] } == "Kazakhstan"
293
+ insist { subject["location"] } == "Somethere in Kazakhstan"
294
+ insist { subject["hobby"] } == "Cloud"
295
+ insist { subject["status"] } == nil
296
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
297
+ insist { subject["%{hmm}"] } == nil
298
+ end
299
+ end
300
+
301
+ describe "blacklist field values with interpolation" do
302
+
303
+ config <<-CONFIG
304
+ filter {
305
+ prune {
306
+ blacklist_values => [ "firstname", "^Borat$",
307
+ "fullname", "%{firstname} Sagdiyev",
308
+ "location", "no no no",
309
+ "status", "^2",
310
+ "%{firstname}_saying", "%{hobby}.*Active" ]
311
+ interpolate => true
312
+ }
313
+ }
314
+ CONFIG
315
+
316
+ sample(
317
+ "firstname" => "Borat",
318
+ "lastname" => "Sagdiyev",
319
+ "fullname" => "Borat Sagdiyev",
320
+ "country" => "Kazakhstan",
321
+ "location" => "Somethere in Kazakhstan",
322
+ "hobby" => "Cloud",
323
+ "status" => "200",
324
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
325
+ "%{hmm}" => "doh"
326
+ ) do
327
+ insist { subject["firstname"] } == nil
328
+ insist { subject["lastname"] } == "Sagdiyev"
329
+ insist { subject["fullname"] } == nil
330
+ insist { subject["country"] } == "Kazakhstan"
331
+ insist { subject["location"] } == "Somethere in Kazakhstan"
332
+ insist { subject["hobby"] } == "Cloud"
333
+ insist { subject["status"] } == nil
334
+ insist { subject["Borat_saying"] } == nil
335
+ insist { subject["%{hmm}"] } == nil
336
+ end
337
+ end
338
+
339
+ describe "whitelist field values on fields witn array values" do
340
+
341
+ config <<-CONFIG
342
+ filter {
343
+ prune {
344
+ whitelist_values => [ "status", "^(1|2|3)",
345
+ "xxx", "3",
346
+ "error", "%{blah}" ]
347
+ }
348
+ }
349
+ CONFIG
350
+
351
+ sample(
352
+ "blah" => "foo",
353
+ "xxx" => [ "1 2 3", "3 4 5" ],
354
+ "status" => [ "100", "200", "300", "400", "500" ],
355
+ "error" => [ "This is foolish" , "Need smthing smart too" ]
356
+ ) do
357
+ insist { subject["blah"] } == "foo"
358
+ insist { subject["error"] } == nil
359
+ insist { subject["xxx"] } == [ "1 2 3", "3 4 5" ]
360
+ insist { subject["status"] } == [ "100", "200", "300" ]
361
+ end
362
+ end
363
+
364
+ describe "blacklist field values on fields witn array values" do
365
+
366
+ config <<-CONFIG
367
+ filter {
368
+ prune {
369
+ blacklist_values => [ "status", "^(1|2|3)",
370
+ "xxx", "3",
371
+ "error", "%{blah}" ]
372
+ }
373
+ }
374
+ CONFIG
375
+
376
+ sample(
377
+ "blah" => "foo",
378
+ "xxx" => [ "1 2 3", "3 4 5" ],
379
+ "status" => [ "100", "200", "300", "400", "500" ],
380
+ "error" => [ "This is foolish", "Need smthing smart too" ]
381
+ ) do
382
+ insist { subject["blah"] } == "foo"
383
+ insist { subject["error"] } == [ "This is foolish", "Need smthing smart too" ]
384
+ insist { subject["xxx"] } == nil
385
+ insist { subject["status"] } == [ "400", "500" ]
386
+ end
387
+ end
388
+
389
+ describe "whitelist field values with interpolation on fields witn array values" do
390
+
391
+ config <<-CONFIG
392
+ filter {
393
+ prune {
394
+ whitelist_values => [ "status", "^(1|2|3)",
395
+ "xxx", "3",
396
+ "error", "%{blah}" ]
397
+ interpolate => true
398
+ }
399
+ }
400
+ CONFIG
401
+
402
+ sample(
403
+ "blah" => "foo",
404
+ "xxx" => [ "1 2 3", "3 4 5" ],
405
+ "status" => [ "100", "200", "300", "400", "500" ],
406
+ "error" => [ "This is foolish" , "Need smthing smart too" ]
407
+ ) do
408
+ insist { subject["blah"] } == "foo"
409
+ insist { subject["error"] } == [ "This is foolish" ]
410
+ insist { subject["xxx"] } == [ "1 2 3", "3 4 5" ]
411
+ insist { subject["status"] } == [ "100", "200", "300" ]
412
+ end
413
+ end
414
+
415
+ describe "blacklist field values with interpolation on fields witn array values" do
416
+
417
+ config <<-CONFIG
418
+ filter {
419
+ prune {
420
+ blacklist_values => [ "status", "^(1|2|3)",
421
+ "xxx", "3",
422
+ "error", "%{blah}" ]
423
+ interpolate => true
424
+ }
425
+ }
426
+ CONFIG
427
+
428
+ sample(
429
+ "blah" => "foo",
430
+ "xxx" => [ "1 2 3", "3 4 5" ],
431
+ "status" => [ "100", "200", "300", "400", "500" ],
432
+ "error" => [ "This is foolish" , "Need smthing smart too" ]
433
+ ) do
434
+ insist { subject["blah"] } == "foo"
435
+ insist { subject["error"] } == [ "Need smthing smart too" ]
436
+ insist { subject["xxx"] } == nil
437
+ insist { subject["status"] } == [ "400", "500" ]
438
+ end
439
+ end
440
+
441
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-prune
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ description: The prune filter is for pruning event data from fields based on whitelist/blacklist
34
+ of field names or their values (names and values can also be regular expressions)
35
+ email: richard.pijnenburg@elasticsearch.com
36
+ executables: []
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - .gitignore
41
+ - Gemfile
42
+ - Rakefile
43
+ - lib/logstash/filters/prune.rb
44
+ - logstash-filter-prune.gemspec
45
+ - rakelib/publish.rake
46
+ - rakelib/vendor.rake
47
+ - spec/filters/prune_spec.rb
48
+ homepage: http://logstash.net/
49
+ licenses:
50
+ - Apache License (2.0)
51
+ metadata:
52
+ logstash_plugin: 'true'
53
+ group: filter
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.4.1
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: The prune filter is for pruning event data from fields based on whitelist/blacklist
74
+ of field names or their values (names and values can also be regular expressions)
75
+ test_files:
76
+ - spec/filters/prune_spec.rb