logstash-filter-anonymize 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MmY3Y2M2MTY4MWI0YWQ3ZjI2M2Q3Yjk2NDUxZTNhMjI0OTFjMDRhZA==
5
+ data.tar.gz: !binary |-
6
+ NWNiYjg3ZWZkZjQ2YjI5Nzc4ZTZkZTA4OTgyODZkN2ZhMWU0YzMxZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ MmUzMWU5Nzk4YjllYzJkNWIzN2NjNmJhMzQ5Nzg5NjVhNzlmOGNlZDkzNmM4
10
+ NjZkODU3OGMyMGE2MmE5OTY1NGFjZTUzMWE5ZDhhNGVmN2ZkZTgyY2ZmMjAx
11
+ OGVlOWMyOWUwMWUyY2FjZjEyNjJmNGIzMzdkNTUwNzcyZTEwYTY=
12
+ data.tar.gz: !binary |-
13
+ ZTMzNjk0ZGJiYjIyMjk5ZWNiYWI5ZjRiOWNhYTU4ZWFiN2Y0NTA0YjI3Mjhh
14
+ NWQ1NzczZDk1ZmUxNjVhMzhkY2Q1MzQ5OTdhYmVmZDM1MDc5YjYwZjY1ZjJj
15
+ YTc0N2YxYWY4MDE5ZThiNmJiYjEwODIwNWZjZjA4ZWY3YjNkMzI=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,95 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+ # Anonymize fields using by replacing values with a consistent hash.
6
+ class LogStash::Filters::Anonymize < LogStash::Filters::Base
7
+ config_name "anonymize"
8
+ milestone 1
9
+
10
+ # The fields to be anonymized
11
+ config :fields, :validate => :array, :required => true
12
+
13
+ # Hashing key
14
+ # When using MURMUR3 the key is ignored but must still be set.
15
+ # When using IPV4_NETWORK key is the subnet prefix lentgh
16
+ config :key, :validate => :string, :required => true
17
+
18
+ # digest/hash type
19
+ config :algorithm, :validate => ['SHA1', 'SHA256', 'SHA384', 'SHA512', 'MD5', "MURMUR3", "IPV4_NETWORK"], :required => true, :default => 'SHA1'
20
+
21
+ public
22
+ def register
23
+ # require any library and set the anonymize function
24
+ case @algorithm
25
+ when "IPV4_NETWORK"
26
+ require 'ipaddr'
27
+ class << self; alias_method :anonymize, :anonymize_ipv4_network; end
28
+ when "MURMUR3"
29
+ require "murmurhash3"
30
+ class << self; alias_method :anonymize, :anonymize_murmur3; end
31
+ else
32
+ require 'openssl'
33
+ class << self; alias_method :anonymize, :anonymize_openssl; end
34
+ end
35
+ end # def register
36
+
37
+ public
38
+ def filter(event)
39
+ return unless filter?(event)
40
+ @fields.each do |field|
41
+ next unless event.include?(field)
42
+ if event[field].is_a?(Array)
43
+ event[field] = event[field].collect { |v| anonymize(v) }
44
+ else
45
+ event[field] = anonymize(event[field])
46
+ end
47
+ end
48
+ end # def filter
49
+
50
+ private
51
+ def anonymize_ipv4_network(ip_string)
52
+ # in JRuby 1.7.11 outputs as US-ASCII
53
+ IPAddr.new(ip_string).mask(@key.to_i).to_s.force_encoding(Encoding::UTF_8)
54
+ end
55
+
56
+ def anonymize_openssl(data)
57
+ digest = algorithm()
58
+ # in JRuby 1.7.11 outputs as ASCII-8BIT
59
+ OpenSSL::HMAC.hexdigest(digest, @key, data).force_encoding(Encoding::UTF_8)
60
+ end
61
+
62
+ def anonymize_murmur3(value)
63
+ case value
64
+ when Fixnum
65
+ MurmurHash3::V32.int_hash(value)
66
+ when String
67
+ MurmurHash3::V32.str_hash(value)
68
+ end
69
+ end
70
+
71
+ def algorithm
72
+
73
+ case @algorithm
74
+ #when 'SHA'
75
+ #return OpenSSL::Digest::SHA.new
76
+ when 'SHA1'
77
+ return OpenSSL::Digest::SHA1.new
78
+ #when 'SHA224'
79
+ #return OpenSSL::Digest::SHA224.new
80
+ when 'SHA256'
81
+ return OpenSSL::Digest::SHA256.new
82
+ when 'SHA384'
83
+ return OpenSSL::Digest::SHA384.new
84
+ when 'SHA512'
85
+ return OpenSSL::Digest::SHA512.new
86
+ #when 'MD4'
87
+ #return OpenSSL::Digest::MD4.new
88
+ when 'MD5'
89
+ return OpenSSL::Digest::MD5.new
90
+ else
91
+ @logger.error("Unknown algorithm")
92
+ end
93
+ end
94
+
95
+ end # class LogStash::Filters::Anonymize
@@ -0,0 +1,29 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-anonymize'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Anonymize fields using by replacing values with a consistent hash"
7
+ s.description = "Anonymize fields using by replacing values with a consistent hash"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'murmurhash3'
26
+ s.add_runtime_dependency 'jruby-openssl', ['0.9.4']
27
+
28
+ end
29
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,190 @@
1
+ # encoding: utf-8
2
+
3
+ require "spec_helper"
4
+ require "logstash/filters/anonymize"
5
+
6
+ describe LogStash::Filters::Anonymize do
7
+
8
+ describe "anonymize ipaddress with IPV4_NETWORK algorithm" do
9
+ # The logstash config goes here.
10
+ # At this time, only filters are supported.
11
+ config <<-CONFIG
12
+ filter {
13
+ anonymize {
14
+ fields => ["clientip"]
15
+ algorithm => "IPV4_NETWORK"
16
+ key => 24
17
+ }
18
+ }
19
+ CONFIG
20
+
21
+ sample("clientip" => "233.255.13.44") do
22
+ insist { subject["clientip"] } == "233.255.13.0"
23
+ end
24
+ end
25
+
26
+ describe "anonymize string with MURMUR3 algorithm" do
27
+ config <<-CONFIG
28
+ filter {
29
+ anonymize {
30
+ fields => ["clientip"]
31
+ algorithm => "MURMUR3"
32
+ key => ""
33
+ }
34
+ }
35
+ CONFIG
36
+
37
+ sample("clientip" => "123.52.122.33") do
38
+ insist { subject["clientip"] } == 1541804874
39
+ end
40
+ end
41
+
42
+ describe "anonymize string with SHA1 alogrithm" do
43
+ # The logstash config goes here.
44
+ # At this time, only filters are supported.
45
+ config <<-CONFIG
46
+ filter {
47
+ anonymize {
48
+ fields => ["clientip"]
49
+ key => "longencryptionkey"
50
+ algorithm => 'SHA1'
51
+ }
52
+ }
53
+ CONFIG
54
+
55
+ sample("clientip" => "123.123.123.123") do
56
+ insist { subject["clientip"] } == "fdc60acc4773dc5ac569ffb78fcb93c9630797f4"
57
+ end
58
+ end
59
+
60
+ # HMAC-SHA224 isn't implemented in JRuby OpenSSL
61
+ #describe "anonymize string with SHA224 alogrithm" do
62
+ # The logstash config goes here.
63
+ # At this time, only filters are supported.
64
+ #config <<-CONFIG
65
+ #filter {
66
+ #anonymize {
67
+ #fields => ["clientip"]
68
+ #key => "longencryptionkey"
69
+ #algorithm => 'SHA224'
70
+ #}
71
+ #}
72
+ #CONFIG
73
+
74
+ #sample("clientip" => "123.123.123.123") do
75
+ #insist { subject["clientip"] } == "5744bbcc4f64acb6a805b7fee3013a8958cc8782d3fb0fb318cec915"
76
+ #end
77
+ #end
78
+
79
+ describe "anonymize string with SHA256 alogrithm" do
80
+ # The logstash config goes here.
81
+ # At this time, only filters are supported.
82
+ config <<-CONFIG
83
+ filter {
84
+ anonymize {
85
+ fields => ["clientip"]
86
+ key => "longencryptionkey"
87
+ algorithm => 'SHA256'
88
+ }
89
+ }
90
+ CONFIG
91
+
92
+ sample("clientip" => "123.123.123.123") do
93
+ insist { subject["clientip"] } == "345bec3eff242d53b568916c2610b3e393d885d6b96d643f38494fd74bf4a9ca"
94
+ end
95
+ end
96
+
97
+ describe "anonymize string with SHA384 alogrithm" do
98
+ # The logstash config goes here.
99
+ # At this time, only filters are supported.
100
+ config <<-CONFIG
101
+ filter {
102
+ anonymize {
103
+ fields => ["clientip"]
104
+ key => "longencryptionkey"
105
+ algorithm => 'SHA384'
106
+ }
107
+ }
108
+ CONFIG
109
+
110
+ sample("clientip" => "123.123.123.123") do
111
+ insist { subject["clientip"] } == "22d4c0e8c4fbcdc4887d2038fca7650f0e2e0e2457ff41c06eb2a980dded6749561c814fe182aff93e2538d18593947a"
112
+ end
113
+ end
114
+
115
+ describe "anonymize string with SHA512 alogrithm" do
116
+ # The logstash config goes here.
117
+ # At this time, only filters are supported.
118
+ config <<-CONFIG
119
+ filter {
120
+ anonymize {
121
+ fields => ["clientip"]
122
+ key => "longencryptionkey"
123
+ algorithm => 'SHA512'
124
+ }
125
+ }
126
+ CONFIG
127
+
128
+ sample("clientip" => "123.123.123.123") do
129
+ insist { subject["clientip"] } == "11c19b326936c08d6c50a3c847d883e5a1362e6a64dd55201a25f2c1ac1b673f7d8bf15b8f112a4978276d573275e3b14166e17246f670c2a539401c5bfdace8"
130
+ end
131
+ end
132
+
133
+ # HMAC-MD4 isn't implemented in JRuby OpenSSL
134
+ #describe "anonymize string with MD4 alogrithm" do
135
+ # The logstash config goes here.
136
+ # At this time, only filters are supported.
137
+ #config <<-CONFIG
138
+ #filter {
139
+ #anonymize {
140
+ #fields => ["clientip"]
141
+ #key => "longencryptionkey"
142
+ #algorithm => 'MD4'
143
+ #}
144
+ #}
145
+ #CONFIG
146
+ #
147
+ #sample("clientip" => "123.123.123.123") do
148
+ #insist { subject["clientip"] } == "0845cb571ab3646e51a07bcabf05e33d"
149
+ #end
150
+ #end
151
+
152
+ describe "anonymize string with MD5 alogrithm" do
153
+ # The logstash config goes here.
154
+ # At this time, only filters are supported.
155
+ config <<-CONFIG
156
+ filter {
157
+ anonymize {
158
+ fields => ["clientip"]
159
+ key => "longencryptionkey"
160
+ algorithm => 'MD5'
161
+ }
162
+ }
163
+ CONFIG
164
+
165
+ sample("clientip" => "123.123.123.123") do
166
+ insist { subject["clientip"] } == "9336c879e305c9604a3843fc3e75948f"
167
+ end
168
+ end
169
+
170
+ describe "Test field with multiple values" do
171
+ # The logstash config goes here.
172
+ # At this time, only filters are supported.
173
+ config <<-CONFIG
174
+ filter {
175
+ anonymize {
176
+ fields => ["clientip"]
177
+ key => "longencryptionkey"
178
+ algorithm => 'MD5'
179
+ }
180
+ }
181
+ CONFIG
182
+
183
+ sample("clientip" => [ "123.123.123.123", "223.223.223.223" ]) do
184
+ insist { subject["clientip"]} == [ "9336c879e305c9604a3843fc3e75948f", "7a6c66b8d3f42a7d650e3354af508df3" ]
185
+ end
186
+ end
187
+
188
+
189
+
190
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-anonymize
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: murmurhash3
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: jruby-openssl
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '='
52
+ - !ruby/object:Gem::Version
53
+ version: 0.9.4
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - '='
59
+ - !ruby/object:Gem::Version
60
+ version: 0.9.4
61
+ description: Anonymize fields using by replacing values with a consistent hash
62
+ email: richard.pijnenburg@elasticsearch.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - .gitignore
68
+ - Gemfile
69
+ - Rakefile
70
+ - lib/logstash/filters/anonymize.rb
71
+ - logstash-filter-anonymize.gemspec
72
+ - rakelib/publish.rake
73
+ - rakelib/vendor.rake
74
+ - spec/filters/anonymize_spec.rb
75
+ homepage: http://logstash.net/
76
+ licenses:
77
+ - Apache License (2.0)
78
+ metadata:
79
+ logstash_plugin: 'true'
80
+ group: filter
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.4.1
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Anonymize fields using by replacing values with a consistent hash
101
+ test_files:
102
+ - spec/filters/anonymize_spec.rb