logstash-filter-anonymize 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MmY3Y2M2MTY4MWI0YWQ3ZjI2M2Q3Yjk2NDUxZTNhMjI0OTFjMDRhZA==
5
+ data.tar.gz: !binary |-
6
+ NWNiYjg3ZWZkZjQ2YjI5Nzc4ZTZkZTA4OTgyODZkN2ZhMWU0YzMxZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MmUzMWU5Nzk4YjllYzJkNWIzN2NjNmJhMzQ5Nzg5NjVhNzlmOGNlZDkzNmM4
10
+ NjZkODU3OGMyMGE2MmE5OTY1NGFjZTUzMWE5ZDhhNGVmN2ZkZTgyY2ZmMjAx
11
+ OGVlOWMyOWUwMWUyY2FjZjEyNjJmNGIzMzdkNTUwNzcyZTEwYTY=
12
+ data.tar.gz: !binary |-
13
+ ZTMzNjk0ZGJiYjIyMjk5ZWNiYWI5ZjRiOWNhYTU4ZWFiN2Y0NTA0YjI3Mjhh
14
+ NWQ1NzczZDk1ZmUxNjVhMzhkY2Q1MzQ5OTdhYmVmZDM1MDc5YjYwZjY1ZjJj
15
+ YTc0N2YxYWY4MDE5ZThiNmJiYjEwODIwNWZjZjA4ZWY3YjNkMzI=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,95 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+ # Anonymize fields using by replacing values with a consistent hash.
6
+ class LogStash::Filters::Anonymize < LogStash::Filters::Base
7
+ config_name "anonymize"
8
+ milestone 1
9
+
10
+ # The fields to be anonymized
11
+ config :fields, :validate => :array, :required => true
12
+
13
+ # Hashing key
14
+ # When using MURMUR3 the key is ignored but must still be set.
15
+ # When using IPV4_NETWORK key is the subnet prefix lentgh
16
+ config :key, :validate => :string, :required => true
17
+
18
+ # digest/hash type
19
+ config :algorithm, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "IPV4_NETWORK"], :required => true, :default => 'SHA1'
20
+
21
+ public
22
+ def register
23
+ # require any library and set the anonymize function
24
+ case @algorithm
25
+ when "IPV4_NETWORK"
26
+ require 'ipaddr'
27
+ class << self; alias_method :anonymize, :anonymize_ipv4_network; end
28
+ when "MURMUR3"
29
+ require "murmurhash3"
30
+ class << self; alias_method :anonymize, :anonymize_murmur3; end
31
+ else
32
+ require 'openssl'
33
+ class << self; alias_method :anonymize, :anonymize_openssl; end
34
+ end
35
+ end # def register
36
+
37
+ public
38
+ def filter(event)
39
+ return unless filter?(event)
40
+ @fields.each do |field|
41
+ next unless event.include?(field)
42
+ if event[field].is_a?(Array)
43
+ event[field] = event[field].collect { |v| anonymize(v) }
44
+ else
45
+ event[field] = anonymize(event[field])
46
+ end
47
+ end
48
+ end # def filter
49
+
50
+ private
51
+ def anonymize_ipv4_network(ip_string)
52
+ # in JRuby 1.7.11 outputs as US-ASCII
53
+ IPAddr.new(ip_string).mask(@key.to_i).to_s.force_encoding(Encoding::UTF_8)
54
+ end
55
+
56
+ def anonymize_openssl(data)
57
+ digest = algorithm()
58
+ # in JRuby 1.7.11 outputs as ASCII-8BIT
59
+ OpenSSL::HMAC.hexdigest(digest, @key, data).force_encoding(Encoding::UTF_8)
60
+ end
61
+
62
+ def anonymize_murmur3(value)
63
+ case value
64
+ when Fixnum
65
+ MurmurHash3::V32.int_hash(value)
66
+ when String
67
+ MurmurHash3::V32.str_hash(value)
68
+ end
69
+ end
70
+
71
+ def algorithm
72
+
73
+ case @algorithm
74
+ #when 'SHA'
75
+ #return OpenSSL::Digest::SHA.new
76
+ when 'SHA1'
77
+ return OpenSSL::Digest::SHA1.new
78
+ #when 'SHA224'
79
+ #return OpenSSL::Digest::SHA224.new
80
+ when 'SHA256'
81
+ return OpenSSL::Digest::SHA256.new
82
+ when 'SHA384'
83
+ return OpenSSL::Digest::SHA384.new
84
+ when 'SHA512'
85
+ return OpenSSL::Digest::SHA512.new
86
+ #when 'MD4'
87
+ #return OpenSSL::Digest::MD4.new
88
+ when 'MD5'
89
+ return OpenSSL::Digest::MD5.new
90
+ else
91
+ @logger.error("Unknown algorithm")
92
+ end
93
+ end
94
+
95
+ end # class LogStash::Filters::Anonymize
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-anonymize'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Anonymize fields using by replacing values with a consistent hash"
7
+ s.description = "Anonymize fields using by replacing values with a consistent hash"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'murmurhash3'
26
+ s.add_runtime_dependency 'jruby-openssl', ['0.9.4']
27
+
28
+ end
29
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,190 @@
1
+ # encoding: utf-8
2
+
3
+ require "spec_helper"
4
+ require "logstash/filters/anonymize"
5
+
6
+ describe LogStash::Filters::Anonymize do
7
+
8
+ describe "anonymize ipaddress with IPV4_NETWORK algorithm" do
9
+ # The logstash config goes here.
10
+ # At this time, only filters are supported.
11
+ config <<-CONFIG
12
+ filter {
13
+ anonymize {
14
+ fields => ["clientip"]
15
+ algorithm => "IPV4_NETWORK"
16
+ key => 24
17
+ }
18
+ }
19
+ CONFIG
20
+
21
+ sample("clientip" => "233.255.13.44") do
22
+ insist { subject["clientip"] } == "233.255.13.0"
23
+ end
24
+ end
25
+
26
+ describe "anonymize string with MURMUR3 algorithm" do
27
+ config <<-CONFIG
28
+ filter {
29
+ anonymize {
30
+ fields => ["clientip"]
31
+ algorithm => "MURMUR3"
32
+ key => ""
33
+ }
34
+ }
35
+ CONFIG
36
+
37
+ sample("clientip" => "123.52.122.33") do
38
+ insist { subject["clientip"] } == 1541804874
39
+ end
40
+ end
41
+
42
+ describe "anonymize string with SHA1 alogrithm" do
43
+ # The logstash config goes here.
44
+ # At this time, only filters are supported.
45
+ config <<-CONFIG
46
+ filter {
47
+ anonymize {
48
+ fields => ["clientip"]
49
+ key => "longencryptionkey"
50
+ algorithm => 'SHA1'
51
+ }
52
+ }
53
+ CONFIG
54
+
55
+ sample("clientip" => "123.123.123.123") do
56
+ insist { subject["clientip"] } == "fdc60acc4773dc5ac569ffb78fcb93c9630797f4"
57
+ end
58
+ end
59
+
60
+ # HMAC-SHA224 isn't implemented in JRuby OpenSSL
61
+ #describe "anonymize string with SHA224 alogrithm" do
62
+ # The logstash config goes here.
63
+ # At this time, only filters are supported.
64
+ #config <<-CONFIG
65
+ #filter {
66
+ #anonymize {
67
+ #fields => ["clientip"]
68
+ #key => "longencryptionkey"
69
+ #algorithm => 'SHA224'
70
+ #}
71
+ #}
72
+ #CONFIG
73
+
74
+ #sample("clientip" => "123.123.123.123") do
75
+ #insist { subject["clientip"] } == "5744bbcc4f64acb6a805b7fee3013a8958cc8782d3fb0fb318cec915"
76
+ #end
77
+ #end
78
+
79
+ describe "anonymize string with SHA256 alogrithm" do
80
+ # The logstash config goes here.
81
+ # At this time, only filters are supported.
82
+ config <<-CONFIG
83
+ filter {
84
+ anonymize {
85
+ fields => ["clientip"]
86
+ key => "longencryptionkey"
87
+ algorithm => 'SHA256'
88
+ }
89
+ }
90
+ CONFIG
91
+
92
+ sample("clientip" => "123.123.123.123") do
93
+ insist { subject["clientip"] } == "345bec3eff242d53b568916c2610b3e393d885d6b96d643f38494fd74bf4a9ca"
94
+ end
95
+ end
96
+
97
+ describe "anonymize string with SHA384 alogrithm" do
98
+ # The logstash config goes here.
99
+ # At this time, only filters are supported.
100
+ config <<-CONFIG
101
+ filter {
102
+ anonymize {
103
+ fields => ["clientip"]
104
+ key => "longencryptionkey"
105
+ algorithm => 'SHA384'
106
+ }
107
+ }
108
+ CONFIG
109
+
110
+ sample("clientip" => "123.123.123.123") do
111
+ insist { subject["clientip"] } == "22d4c0e8c4fbcdc4887d2038fca7650f0e2e0e2457ff41c06eb2a980dded6749561c814fe182aff93e2538d18593947a"
112
+ end
113
+ end
114
+
115
+ describe "anonymize string with SHA512 alogrithm" do
116
+ # The logstash config goes here.
117
+ # At this time, only filters are supported.
118
+ config <<-CONFIG
119
+ filter {
120
+ anonymize {
121
+ fields => ["clientip"]
122
+ key => "longencryptionkey"
123
+ algorithm => 'SHA512'
124
+ }
125
+ }
126
+ CONFIG
127
+
128
+ sample("clientip" => "123.123.123.123") do
129
+ insist { subject["clientip"] } == "11c19b326936c08d6c50a3c847d883e5a1362e6a64dd55201a25f2c1ac1b673f7d8bf15b8f112a4978276d573275e3b14166e17246f670c2a539401c5bfdace8"
130
+ end
131
+ end
132
+
133
+ # HMAC-MD4 isn't implemented in JRuby OpenSSL
134
+ #describe "anonymize string with MD4 alogrithm" do
135
+ # The logstash config goes here.
136
+ # At this time, only filters are supported.
137
+ #config <<-CONFIG
138
+ #filter {
139
+ #anonymize {
140
+ #fields => ["clientip"]
141
+ #key => "longencryptionkey"
142
+ #algorithm => 'MD4'
143
+ #}
144
+ #}
145
+ #CONFIG
146
+ #
147
+ #sample("clientip" => "123.123.123.123") do
148
+ #insist { subject["clientip"] } == "0845cb571ab3646e51a07bcabf05e33d"
149
+ #end
150
+ #end
151
+
152
+ describe "anonymize string with MD5 alogrithm" do
153
+ # The logstash config goes here.
154
+ # At this time, only filters are supported.
155
+ config <<-CONFIG
156
+ filter {
157
+ anonymize {
158
+ fields => ["clientip"]
159
+ key => "longencryptionkey"
160
+ algorithm => 'MD5'
161
+ }
162
+ }
163
+ CONFIG
164
+
165
+ sample("clientip" => "123.123.123.123") do
166
+ insist { subject["clientip"] } == "9336c879e305c9604a3843fc3e75948f"
167
+ end
168
+ end
169
+
170
+ describe "Test field with multiple values" do
171
+ # The logstash config goes here.
172
+ # At this time, only filters are supported.
173
+ config <<-CONFIG
174
+ filter {
175
+ anonymize {
176
+ fields => ["clientip"]
177
+ key => "longencryptionkey"
178
+ algorithm => 'MD5'
179
+ }
180
+ }
181
+ CONFIG
182
+
183
+ sample("clientip" => [ "123.123.123.123", "223.223.223.223" ]) do
184
+ insist { subject["clientip"]} == [ "9336c879e305c9604a3843fc3e75948f", "7a6c66b8d3f42a7d650e3354af508df3" ]
185
+ end
186
+ end
187
+
188
+
189
+
190
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-anonymize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: murmurhash3
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: jruby-openssl
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '='
52
+ - !ruby/object:Gem::Version
53
+ version: 0.9.4
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '='
59
+ - !ruby/object:Gem::Version
60
+ version: 0.9.4
61
+ description: Anonymize fields using by replacing values with a consistent hash
62
+ email: richard.pijnenburg@elasticsearch.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - .gitignore
68
+ - Gemfile
69
+ - Rakefile
70
+ - lib/logstash/filters/anonymize.rb
71
+ - logstash-filter-anonymize.gemspec
72
+ - rakelib/publish.rake
73
+ - rakelib/vendor.rake
74
+ - spec/filters/anonymize_spec.rb
75
+ homepage: http://logstash.net/
76
+ licenses:
77
+ - Apache License (2.0)
78
+ metadata:
79
+ logstash_plugin: 'true'
80
+ group: filter
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.4.1
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Anonymize fields using by replacing values with a consistent hash
101
+ test_files:
102
+ - spec/filters/anonymize_spec.rb