logstash-filter-prune 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Y2UxMGMyM2U5NTRmMmM4YjFlMDM0NzhlNDY0NWE5NmYxMDQzOTA5Yg==
5
+ data.tar.gz: !binary |-
6
+ NWUzMTUxY2ZhNjY4YzFmNjJlNzM5YmZkYzQ0ODUzNjcyYmZiY2MwZg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZTVhYjVhOGNiODliMWE4ZGU4MTkzZTU3M2Y5ZmUwMzM1ODI0ZjBhOWFmZGNi
10
+ MzRmNDU3YWM0ZWIwNWY4Mjg5ZWMyYjAwM2I3NzEzZjc2ZWIyMTljNWYzZjFl
11
+ OWE4ZDdhMTMzNjg5MWU4ZDc2NjIwNTJiZGJhYmI1NzBkYzc5MWE=
12
+ data.tar.gz: !binary |-
13
+ ZjJiN2ExYTcyMjIzMzViYWY4OTBlOGVkZGExMWE5OTdkOWNjMzhkNzJhOTA2
14
+ OTcxNDYyZmY1NDk2ZDAwMjA4MTdmNDFkZDI0MzkxMDgzYjI4N2NjODVlOGZj
15
+ MWEyMDQ4YWM1Y2Q3MWQ1YjMyZGFjZTFhZmM3MjEwODMzNTZlYTQ=
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,150 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+
6
+ # The prune filter is for pruning event data from @fileds based on whitelist/blacklist
7
+ # of field names or their values (names and values can also be regular expressions).
8
+
9
+ class LogStash::Filters::Prune < LogStash::Filters::Base
10
+ config_name "prune"
11
+ milestone 1
12
+
13
+ # Trigger whether configation fields and values should be interpolated for
14
+ # dynamic values.
15
+ # Probably adds some performance overhead. Defaults to false.
16
+ config :interpolate, :validate => :boolean, :default => false
17
+
18
+ # Include only fields only if their names match specified regexps, default to empty list which means include everything.
19
+ #
20
+ # filter {
21
+ # %PLUGIN% {
22
+ # tags => [ "apache-accesslog" ]
23
+ # whitelist_names => [ "method", "(referrer|status)", "${some}_field" ]
24
+ # }
25
+ # }
26
+ config :whitelist_names, :validate => :array, :default => []
27
+
28
+ # Exclude fields which names match specified regexps, by default exclude unresolved %{field} strings.
29
+ #
30
+ # filter {
31
+ # %PLUGIN% {
32
+ # tags => [ "apache-accesslog" ]
33
+ # blacklist_names => [ "method", "(referrer|status)", "${some}_field" ]
34
+ # }
35
+ # }
36
+ config :blacklist_names, :validate => :array, :default => [ "%\{[^}]+\}" ]
37
+
38
+ # Include specified fields only if their values match regexps.
39
+ # In case field values are arrays, the fields are pruned on per array item
40
+ # thus only matching array items will be included.
41
+ #
42
+ # filter {
43
+ # %PLUGIN% {
44
+ # tags => [ "apache-accesslog" ]
45
+ # whitelist_values => [ "uripath", "/index.php",
46
+ # "method", "(GET|POST)",
47
+ # "status", "^[^2]" ]
48
+ # }
49
+ # }
50
+ config :whitelist_values, :validate => :hash, :default => {}
51
+
52
+ # Exclude specified fields if their values match regexps.
53
+ # In case field values are arrays, the fields are pruned on per array item
54
+ # in case all array items are matched whole field will be deleted.
55
+ #
56
+ # filter {
57
+ # %PLUGIN% {
58
+ # tags => [ "apache-accesslog" ]
59
+ # blacklist_values => [ "uripath", "/index.php",
60
+ # "method", "(HEAD|OPTIONS)",
61
+ # "status", "^[^2]" ]
62
+ # }
63
+ # }
64
+ config :blacklist_values, :validate => :hash, :default => {}
65
+
66
+ public
67
+ def register
68
+ unless @interpolate
69
+ @whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(x)})
70
+ @blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(x)})
71
+ @whitelist_values.each do |key, value|
72
+ @whitelist_values[key] = Regexp.new(value)
73
+ end
74
+ @blacklist_values.each do |key, value|
75
+ @blacklist_values[key] = Regexp.new(value)
76
+ end
77
+ end
78
+ end # def register
79
+
80
+ public
81
+ def filter(event)
82
+ return unless filter?(event)
83
+
84
+ hash = event.to_hash
85
+
86
+ # We need to collect fields which needs to be remove ,and only in the end
87
+ # actually remove it since then interpolation mode you can get unexpected
88
+ # results as fields with dynamic values will not match since the fields to
89
+ # which they refer have already been removed.
90
+ fields_to_remove = []
91
+
92
+ unless @whitelist_names.empty?
93
+ @whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
94
+ hash.each_key do |field|
95
+ fields_to_remove << field unless field.match(@whitelist_names_regexp)
96
+ end
97
+ end
98
+
99
+ unless @blacklist_names.empty?
100
+ @blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
101
+ hash.each_key do |field|
102
+ fields_to_remove << field if field.match(@blacklist_names_regexp)
103
+ end
104
+ end
105
+
106
+ @whitelist_values.each do |key, value|
107
+ if @interpolate
108
+ key = event.sprintf(key)
109
+ value = Regexp.new(event.sprintf(value))
110
+ end
111
+ if hash[key]
112
+ if hash[key].is_a?(Array)
113
+ subvalues_to_remove = hash[key].find_all{|x| not x.match(value)}
114
+ unless subvalues_to_remove.empty?
115
+ fields_to_remove << (subvalues_to_remove.length == hash[key].length ? key : { :key => key, :values => subvalues_to_remove })
116
+ end
117
+ else
118
+ fields_to_remove << key if not hash[key].match(value)
119
+ end
120
+ end
121
+ end
122
+
123
+ @blacklist_values.each do |key, value|
124
+ if @interpolate
125
+ key = event.sprintf(key)
126
+ value = Regexp.new(event.sprintf(value))
127
+ end
128
+ if hash[key]
129
+ if hash[key].is_a?(Array)
130
+ subvalues_to_remove = hash[key].find_all{|x| x.match(value)}
131
+ unless subvalues_to_remove.empty?
132
+ fields_to_remove << (subvalues_to_remove.length == hash[key].length ? key : { :key => key, :values => subvalues_to_remove })
133
+ end
134
+ else
135
+ fields_to_remove << key if hash[key].match(value)
136
+ end
137
+ end
138
+ end
139
+
140
+ fields_to_remove.each do |field|
141
+ if field.is_a?(Hash)
142
+ hash[field[:key]] = hash[field[:key]] - field[:values]
143
+ else
144
+ hash.delete(field)
145
+ end
146
+ end
147
+
148
+ filter_matched(event)
149
+ end # def filter
150
+ end # class LogStash::Filters::Prune
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-prune'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "The prune filter is for pruning event data from fields based on whitelist/blacklist of field names or their values (names and values can also be regular expressions)"
7
+ s.description = "The prune filter is for pruning event data from fields based on whitelist/blacklist of field names or their values (names and values can also be regular expressions)"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ end
26
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,441 @@
1
+ require "spec_helper"
2
+ require "logstash/filters/prune"
3
+
4
+ # Currently the prune filter has bugs and I can't really tell what the intended
5
+ # behavior is.
6
+ #
7
+ # See the 'whitelist field values with interpolation' test for a commented
8
+ # explanation of my confusion.
9
+ describe LogStash::Filters::Prune, :if => false do
10
+
11
+
12
+ describe "defaults" do
13
+
14
+ config <<-CONFIG
15
+ filter {
16
+ prune { }
17
+ }
18
+ CONFIG
19
+
20
+ sample(
21
+ "firstname" => "Borat",
22
+ "lastname" => "Sagdiyev",
23
+ "fullname" => "Borat Sagdiyev",
24
+ "country" => "Kazakhstan",
25
+ "location" => "Somethere in Kazakhstan",
26
+ "hobby" => "Cloud",
27
+ "status" => "200",
28
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
29
+ "%{hmm}" => "doh"
30
+ ) do
31
+ insist { subject["firstname"] } == "Borat"
32
+ insist { subject["lastname"] } == "Sagdiyev"
33
+ insist { subject["fullname"] } == "Borat Sagdiyev"
34
+ insist { subject["country"] } == "Kazakhstan"
35
+ insist { subject["location"] } == "Somethere in Kazakhstan"
36
+ insist { subject["hobby"] } == "Cloud"
37
+ insist { subject["status"] } == "200"
38
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
39
+ insist { subject["%{hmm}"] } == nil
40
+ end
41
+ end
42
+
43
+ describe "whitelist field names" do
44
+
45
+ config <<-CONFIG
46
+ filter {
47
+ prune {
48
+ whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
49
+ }
50
+ }
51
+ CONFIG
52
+
53
+ sample(
54
+ "firstname" => "Borat",
55
+ "lastname" => "Sagdiyev",
56
+ "fullname" => "Borat Sagdiyev",
57
+ "country" => "Kazakhstan",
58
+ "location" => "Somethere in Kazakhstan",
59
+ "hobby" => "Cloud",
60
+ "status" => "200",
61
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
62
+ "%{hmm}" => "doh"
63
+ ) do
64
+ insist { subject["firstname"] } == "Borat"
65
+ insist { subject["lastname"] } == nil
66
+ insist { subject["fullname"] } == nil
67
+ insist { subject["country"] } == nil
68
+ insist { subject["location"] } == nil
69
+ insist { subject["hobby"] } == "Cloud"
70
+ insist { subject["status"] } == "200"
71
+ insist { subject["Borat_saying"] } == nil
72
+ insist { subject["%{hmm}"] } == nil
73
+ end
74
+ end
75
+
76
+ describe "whitelist field names with interpolation" do
77
+
78
+ config <<-CONFIG
79
+ filter {
80
+ prune {
81
+ whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
82
+ interpolate => true
83
+ }
84
+ }
85
+ CONFIG
86
+
87
+ sample(
88
+ "firstname" => "Borat",
89
+ "lastname" => "Sagdiyev",
90
+ "fullname" => "Borat Sagdiyev",
91
+ "country" => "Kazakhstan",
92
+ "location" => "Somethere in Kazakhstan",
93
+ "hobby" => "Cloud",
94
+ "status" => "200",
95
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
96
+ "%{hmm}" => "doh"
97
+ ) do
98
+ insist { subject["firstname"] } == "Borat"
99
+ insist { subject["lastname"] } == nil
100
+ insist { subject["fullname"] } == nil
101
+ insist { subject["country"] } == nil
102
+ insist { subject["location"] } == nil
103
+ insist { subject["hobby"] } == "Cloud"
104
+ insist { subject["status"] } == "200"
105
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
106
+ insist { subject["%{hmm}"] } == nil
107
+ end
108
+ end
109
+
110
+ describe "blacklist field names" do
111
+
112
+ config <<-CONFIG
113
+ filter {
114
+ prune {
115
+ blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
116
+ }
117
+ }
118
+ CONFIG
119
+
120
+ sample(
121
+ "firstname" => "Borat",
122
+ "lastname" => "Sagdiyev",
123
+ "fullname" => "Borat Sagdiyev",
124
+ "country" => "Kazakhstan",
125
+ "location" => "Somethere in Kazakhstan",
126
+ "hobby" => "Cloud",
127
+ "status" => "200",
128
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
129
+ "%{hmm}" => "doh"
130
+ ) do
131
+ insist { subject["firstname"] } == nil
132
+ insist { subject["lastname"] } == "Sagdiyev"
133
+ insist { subject["fullname"] } == "Borat Sagdiyev"
134
+ insist { subject["country"] } == "Kazakhstan"
135
+ insist { subject["location"] } == "Somethere in Kazakhstan"
136
+ insist { subject["hobby"] } == nil
137
+ insist { subject["status"] } == nil
138
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
139
+ insist { subject["%{hmm}"] } == "doh"
140
+ end
141
+ end
142
+
143
+ describe "blacklist field names with interpolation" do
144
+
145
+ config <<-CONFIG
146
+ filter {
147
+ prune {
148
+ blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
149
+ interpolate => true
150
+ }
151
+ }
152
+ CONFIG
153
+
154
+ sample(
155
+ "firstname" => "Borat",
156
+ "lastname" => "Sagdiyev",
157
+ "fullname" => "Borat Sagdiyev",
158
+ "country" => "Kazakhstan",
159
+ "location" => "Somethere in Kazakhstan",
160
+ "hobby" => "Cloud",
161
+ "status" => "200",
162
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
163
+ "%{hmm}" => "doh"
164
+ ) do
165
+ insist { subject["firstname"] } == nil
166
+ insist { subject["lastname"] } == "Sagdiyev"
167
+ insist { subject["fullname"] } == "Borat Sagdiyev"
168
+ insist { subject["country"] } == "Kazakhstan"
169
+ insist { subject["location"] } == "Somethere in Kazakhstan"
170
+ insist { subject["hobby"] } == nil
171
+ insist { subject["status"] } == nil
172
+ insist { subject["Borat_saying"] } == nil
173
+ insist { subject["%{hmm}"] } == "doh"
174
+ end
175
+ end
176
+
177
+ describe "whitelist field values" do
178
+
179
+ config <<-CONFIG
180
+ filter {
181
+ prune {
182
+ # This should only permit fields named 'firstname', 'fullname',
183
+ # 'location', 'status', etc.
184
+ whitelist_values => [ "firstname", "^Borat$",
185
+ "fullname", "%{firstname} Sagdiyev",
186
+ "location", "no no no",
187
+ "status", "^2",
188
+ "%{firstname}_saying", "%{hobby}.*Active" ]
189
+ }
190
+ }
191
+ CONFIG
192
+
193
+ sample(
194
+ "firstname" => "Borat",
195
+ "lastname" => "Sagdiyev",
196
+ "fullname" => "Borat Sagdiyev",
197
+ "country" => "Kazakhstan",
198
+ "location" => "Somethere in Kazakhstan",
199
+ "hobby" => "Cloud",
200
+ "status" => "200",
201
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
202
+ "%{hmm}" => "doh"
203
+ ) do
204
+ insist { subject["firstname"] } == "Borat"
205
+
206
+ # TODO(sissel): According to the config above, this should be nil because
207
+ # it is not in the list of whitelisted fields, but we expect it to be
208
+ # "Sagdiyev" ? I am confused.
209
+ insist { subject["lastname"] } == "Sagdiyev"
210
+ insist { subject["fullname"] } == nil
211
+ insist { subject["country"] } == "Kazakhstan"
212
+ insist { subject["location"] } == nil
213
+ insist { subject["hobby"] } == "Cloud"
214
+ insist { subject["status"] } == "200"
215
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
216
+
217
+ # TODO(sissel): Contrary to the 'lastname' check, we expect %{hmm} field
218
+ # to be nil because it is not whitelisted, yes? Contradictory insists
219
+ # here. I don't know what the intended behavior is... Seems like
220
+ # whitelist means 'anything not here' but since this test is written
221
+ # confusingly, I dont' know how to move forward.
222
+ insist { subject["%{hmm}"] } == nil
223
+ end
224
+ end
225
+
226
+ describe "whitelist field values with interpolation" do
227
+
228
+ config <<-CONFIG
229
+ filter {
230
+ prune {
231
+ whitelist_values => [ "firstname", "^Borat$",
232
+ "fullname", "%{firstname} Sagdiyev",
233
+ "location", "no no no",
234
+ "status", "^2",
235
+ "%{firstname}_saying", "%{hobby}.*Active" ]
236
+ interpolate => true
237
+ }
238
+ }
239
+ CONFIG
240
+
241
+ sample(
242
+ "firstname" => "Borat",
243
+ "lastname" => "Sagdiyev",
244
+ "fullname" => "Borat Sagdiyev",
245
+ "country" => "Kazakhstan",
246
+ "location" => "Somethere in Kazakhstan",
247
+ "hobby" => "Cloud",
248
+ "status" => "200",
249
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
250
+ "%{hmm}" => "doh"
251
+ ) do
252
+ insist { subject["firstname"] } == "Borat"
253
+ insist { subject["lastname"] } == "Sagdiyev"
254
+ insist { subject["fullname"] } == "Borat Sagdiyev"
255
+ insist { subject["country"] } == "Kazakhstan"
256
+ insist { subject["location"] } == nil
257
+ insist { subject["hobby"] } == "Cloud"
258
+ insist { subject["status"] } == "200"
259
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
260
+ insist { subject["%{hmm}"] } == nil
261
+ end
262
+ end
263
+
264
+ describe "blacklist field values" do
265
+
266
+ config <<-CONFIG
267
+ filter {
268
+ prune {
269
+ blacklist_values => [ "firstname", "^Borat$",
270
+ "fullname", "%{firstname} Sagdiyev",
271
+ "location", "no no no",
272
+ "status", "^2",
273
+ "%{firstname}_saying", "%{hobby}.*Active" ]
274
+ }
275
+ }
276
+ CONFIG
277
+
278
+ sample(
279
+ "firstname" => "Borat",
280
+ "lastname" => "Sagdiyev",
281
+ "fullname" => "Borat Sagdiyev",
282
+ "country" => "Kazakhstan",
283
+ "location" => "Somethere in Kazakhstan",
284
+ "hobby" => "Cloud",
285
+ "status" => "200",
286
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
287
+ "%{hmm}" => "doh"
288
+ ) do
289
+ insist { subject["firstname"] } == nil
290
+ insist { subject["lastname"] } == "Sagdiyev"
291
+ insist { subject["fullname"] } == "Borat Sagdiyev"
292
+ insist { subject["country"] } == "Kazakhstan"
293
+ insist { subject["location"] } == "Somethere in Kazakhstan"
294
+ insist { subject["hobby"] } == "Cloud"
295
+ insist { subject["status"] } == nil
296
+ insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
297
+ insist { subject["%{hmm}"] } == nil
298
+ end
299
+ end
300
+
301
+ describe "blacklist field values with interpolation" do
302
+
303
+ config <<-CONFIG
304
+ filter {
305
+ prune {
306
+ blacklist_values => [ "firstname", "^Borat$",
307
+ "fullname", "%{firstname} Sagdiyev",
308
+ "location", "no no no",
309
+ "status", "^2",
310
+ "%{firstname}_saying", "%{hobby}.*Active" ]
311
+ interpolate => true
312
+ }
313
+ }
314
+ CONFIG
315
+
316
+ sample(
317
+ "firstname" => "Borat",
318
+ "lastname" => "Sagdiyev",
319
+ "fullname" => "Borat Sagdiyev",
320
+ "country" => "Kazakhstan",
321
+ "location" => "Somethere in Kazakhstan",
322
+ "hobby" => "Cloud",
323
+ "status" => "200",
324
+ "Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
325
+ "%{hmm}" => "doh"
326
+ ) do
327
+ insist { subject["firstname"] } == nil
328
+ insist { subject["lastname"] } == "Sagdiyev"
329
+ insist { subject["fullname"] } == nil
330
+ insist { subject["country"] } == "Kazakhstan"
331
+ insist { subject["location"] } == "Somethere in Kazakhstan"
332
+ insist { subject["hobby"] } == "Cloud"
333
+ insist { subject["status"] } == nil
334
+ insist { subject["Borat_saying"] } == nil
335
+ insist { subject["%{hmm}"] } == nil
336
+ end
337
+ end
338
+
339
+ describe "whitelist field values on fields witn array values" do
340
+
341
+ config <<-CONFIG
342
+ filter {
343
+ prune {
344
+ whitelist_values => [ "status", "^(1|2|3)",
345
+ "xxx", "3",
346
+ "error", "%{blah}" ]
347
+ }
348
+ }
349
+ CONFIG
350
+
351
+ sample(
352
+ "blah" => "foo",
353
+ "xxx" => [ "1 2 3", "3 4 5" ],
354
+ "status" => [ "100", "200", "300", "400", "500" ],
355
+ "error" => [ "This is foolish" , "Need smthing smart too" ]
356
+ ) do
357
+ insist { subject["blah"] } == "foo"
358
+ insist { subject["error"] } == nil
359
+ insist { subject["xxx"] } == [ "1 2 3", "3 4 5" ]
360
+ insist { subject["status"] } == [ "100", "200", "300" ]
361
+ end
362
+ end
363
+
364
+ describe "blacklist field values on fields witn array values" do
365
+
366
+ config <<-CONFIG
367
+ filter {
368
+ prune {
369
+ blacklist_values => [ "status", "^(1|2|3)",
370
+ "xxx", "3",
371
+ "error", "%{blah}" ]
372
+ }
373
+ }
374
+ CONFIG
375
+
376
+ sample(
377
+ "blah" => "foo",
378
+ "xxx" => [ "1 2 3", "3 4 5" ],
379
+ "status" => [ "100", "200", "300", "400", "500" ],
380
+ "error" => [ "This is foolish", "Need smthing smart too" ]
381
+ ) do
382
+ insist { subject["blah"] } == "foo"
383
+ insist { subject["error"] } == [ "This is foolish", "Need smthing smart too" ]
384
+ insist { subject["xxx"] } == nil
385
+ insist { subject["status"] } == [ "400", "500" ]
386
+ end
387
+ end
388
+
389
+ describe "whitelist field values with interpolation on fields witn array values" do
390
+
391
+ config <<-CONFIG
392
+ filter {
393
+ prune {
394
+ whitelist_values => [ "status", "^(1|2|3)",
395
+ "xxx", "3",
396
+ "error", "%{blah}" ]
397
+ interpolate => true
398
+ }
399
+ }
400
+ CONFIG
401
+
402
+ sample(
403
+ "blah" => "foo",
404
+ "xxx" => [ "1 2 3", "3 4 5" ],
405
+ "status" => [ "100", "200", "300", "400", "500" ],
406
+ "error" => [ "This is foolish" , "Need smthing smart too" ]
407
+ ) do
408
+ insist { subject["blah"] } == "foo"
409
+ insist { subject["error"] } == [ "This is foolish" ]
410
+ insist { subject["xxx"] } == [ "1 2 3", "3 4 5" ]
411
+ insist { subject["status"] } == [ "100", "200", "300" ]
412
+ end
413
+ end
414
+
415
+ describe "blacklist field values with interpolation on fields witn array values" do
416
+
417
+ config <<-CONFIG
418
+ filter {
419
+ prune {
420
+ blacklist_values => [ "status", "^(1|2|3)",
421
+ "xxx", "3",
422
+ "error", "%{blah}" ]
423
+ interpolate => true
424
+ }
425
+ }
426
+ CONFIG
427
+
428
+ sample(
429
+ "blah" => "foo",
430
+ "xxx" => [ "1 2 3", "3 4 5" ],
431
+ "status" => [ "100", "200", "300", "400", "500" ],
432
+ "error" => [ "This is foolish" , "Need smthing smart too" ]
433
+ ) do
434
+ insist { subject["blah"] } == "foo"
435
+ insist { subject["error"] } == [ "Need smthing smart too" ]
436
+ insist { subject["xxx"] } == nil
437
+ insist { subject["status"] } == [ "400", "500" ]
438
+ end
439
+ end
440
+
441
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-prune
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ description: The prune filter is for pruning event data from fields based on whitelist/blacklist
34
+ of field names or their values (names and values can also be regular expressions)
35
+ email: richard.pijnenburg@elasticsearch.com
36
+ executables: []
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - .gitignore
41
+ - Gemfile
42
+ - Rakefile
43
+ - lib/logstash/filters/prune.rb
44
+ - logstash-filter-prune.gemspec
45
+ - rakelib/publish.rake
46
+ - rakelib/vendor.rake
47
+ - spec/filters/prune_spec.rb
48
+ homepage: http://logstash.net/
49
+ licenses:
50
+ - Apache License (2.0)
51
+ metadata:
52
+ logstash_plugin: 'true'
53
+ group: filter
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.4.1
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: The prune filter is for pruning event data from fields based on whitelist/blacklist
74
+ of field names or their values (names and values can also be regular expressions)
75
+ test_files:
76
+ - spec/filters/prune_spec.rb