fluent-plugin-anonymizer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE +14 -0
- data/README.md +100 -0
- data/Rakefile +9 -0
- data/fluent-plugin-anonymizer.gemspec +22 -0
- data/lib/fluent/plugin/out_anonymizer.rb +79 -0
- data/test/helper.rb +28 -0
- data/test/plugin/test_out_anonymizer.rb +90 -0
- metadata +107 -0
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
Copyright (c) 2013- Kentaro Yoshida
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
14
|
+
|
data/README.md
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# fluent-plugin-anonymizer [](https://travis-ci.org/y-ken/fluent-plugin-anonymizer)
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as IP address, ID, email, phone number and so on.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
`````
|
10
|
+
### native gem
|
11
|
+
gem install fluent-plugin-anonymizer
|
12
|
+
|
13
|
+
### td-agent gem
|
14
|
+
/usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-anonymizer
|
15
|
+
`````
|
16
|
+
|
17
|
+
## Tutorial
|
18
|
+
|
19
|
+
#### configuration
|
20
|
+
|
21
|
+
It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, rounding number with 24bit netmask with `ipv4_mask_keys` and `ipv4_mask_subnet` option.
|
22
|
+
|
23
|
+
`````
|
24
|
+
<source>
|
25
|
+
type forward
|
26
|
+
port 24224
|
27
|
+
</source>
|
28
|
+
|
29
|
+
<match test.message>
|
30
|
+
type anonymize
|
31
|
+
sha1_keys user_id, member_id, mail
|
32
|
+
ipv4_mask_keys host
|
33
|
+
ipv4_mask_subnet 24
|
34
|
+
remove_tag_prefix test.
|
35
|
+
add_rag_prefix anonymized.
|
36
|
+
</match>
|
37
|
+
|
38
|
+
<match anonymized.message>
|
39
|
+
type stdout
|
40
|
+
</match>
|
41
|
+
`````
|
42
|
+
|
43
|
+
#### result
|
44
|
+
|
45
|
+
`````
|
46
|
+
$ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
47
|
+
|
48
|
+
$ tail -f /var/log/td-agent/td-agent.log
|
49
|
+
2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.0.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
|
50
|
+
`````
|
51
|
+
|
52
|
+
### Params
|
53
|
+
|
54
|
+
* `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys`
|
55
|
+
|
56
|
+
Specify which hash algorithm to be used for following one or more keys.
|
57
|
+
|
58
|
+
* `hash_salt` (default: none)
|
59
|
+
|
60
|
+
This salt affects for `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys` settings.
|
61
|
+
It is recommend to set a hash salt to prevent rainbow table attacks.
|
62
|
+
|
63
|
+
|
64
|
+
* `ipv4_mask_keys`
|
65
|
+
* `ipv4_mask_subnet` (default: 24)
|
66
|
+
|
67
|
+
Round number for following one or more keys. It makes easy to aggregate calculation.
|
68
|
+
|
69
|
+
| ipv4_mask_subnet | input | output |
|
70
|
+
|------------------|-----------------|---------------|
|
71
|
+
| 24 | 192.168.200.100 | 192.168.200.0 |
|
72
|
+
| 16 | 192.168.200.100 | 192.168.0.0 |
|
73
|
+
| 8 | 192.168.200.100 | 192.0.0.0 |
|
74
|
+
|
75
|
+
* include_tag_key (default: false)
|
76
|
+
|
77
|
+
Add original tag name into filtered record using SetTagKeyMixin function.
|
78
|
+
|
79
|
+
* remove_tag_prefix
|
80
|
+
* remove_tag_suffix
|
81
|
+
* add_tag_prefix
|
82
|
+
* add_tag_suffix
|
83
|
+
|
84
|
+
Edit tag format using HandleTagNameMixin function.
|
85
|
+
|
86
|
+
## Blog Articles
|
87
|
+
|
88
|
+
* http://y-ken.hatenablog.com/entry/fluent-plugin-anonymizer-has-released
|
89
|
+
|
90
|
+
## TODO
|
91
|
+
|
92
|
+
Pull requests are very welcome!!
|
93
|
+
|
94
|
+
## Copyright
|
95
|
+
|
96
|
+
Copyright © 2013- Kentaro Yoshida ([@yoshi_ken](https://twitter.com/yoshi_ken))
|
97
|
+
|
98
|
+
## License
|
99
|
+
|
100
|
+
Apache License, Version 2.0
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "fluent-plugin-anonymizer"
|
7
|
+
spec.version = "0.0.1"
|
8
|
+
spec.authors = ["Kentaro Yoshida"]
|
9
|
+
spec.email = ["y.ken.studio@gmail.com"]
|
10
|
+
spec.summary = %q{Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as IP address, ID, email, phone number and so on.}
|
11
|
+
spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
|
12
|
+
spec.license = "Apache License, Version 2.0"
|
13
|
+
|
14
|
+
spec.files = `git ls-files`.split($/)
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
+
spec.require_paths = ["lib"]
|
18
|
+
|
19
|
+
spec.add_development_dependency "bundler"
|
20
|
+
spec.add_development_dependency "rake"
|
21
|
+
spec.add_runtime_dependency "fluentd"
|
22
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
class Fluent::AnonymizerOutput < Fluent::Output
|
2
|
+
Fluent::Plugin.register_output('anonymizer', self)
|
3
|
+
|
4
|
+
HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipv4_mask)
|
5
|
+
config_param :hash_salt, :string, :default => ''
|
6
|
+
config_param :ipv4_mask_subnet, :integer, :default => 24
|
7
|
+
|
8
|
+
include Fluent::HandleTagNameMixin
|
9
|
+
|
10
|
+
include Fluent::SetTagKeyMixin
|
11
|
+
config_set_default :include_tag_key, false
|
12
|
+
|
13
|
+
DIGEST = {
|
14
|
+
"md5" => Proc.new { Digest::MD5 },
|
15
|
+
"sha1" => Proc.new { Digest::SHA1 },
|
16
|
+
"sha256" => Proc.new { Digest::SHA256 },
|
17
|
+
"sha384" => Proc.new { Digest::SHA384 },
|
18
|
+
"sha512" => Proc.new { Digest::SHA512 }
|
19
|
+
}
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
require 'digest/sha2'
|
23
|
+
require 'ipaddr'
|
24
|
+
super
|
25
|
+
end
|
26
|
+
|
27
|
+
def configure(conf)
|
28
|
+
super
|
29
|
+
|
30
|
+
@hash_keys = Hash.new
|
31
|
+
conf.keys.select{|k| k =~ /_keys$/}.each do |key|
|
32
|
+
hash_algorithm_name = key.sub('_keys','')
|
33
|
+
raise Fluent::ConfigError, "anonymizer: unsupported key #{hash_algorithm_name}" unless HASH_ALGORITHM.include?(hash_algorithm_name)
|
34
|
+
conf[key].gsub(' ', '').split(',').each do |record_key|
|
35
|
+
@hash_keys.store(record_key, hash_algorithm_name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
if @hash_keys.count < 1
|
40
|
+
raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
|
41
|
+
end
|
42
|
+
|
43
|
+
if ( !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
|
44
|
+
raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def emit(tag, es, chain)
|
49
|
+
es.each do |time, record|
|
50
|
+
record = filter_anonymize_record(record)
|
51
|
+
filter_record(tag, time, record)
|
52
|
+
Fluent::Engine.emit(tag, time, record)
|
53
|
+
end
|
54
|
+
chain.next
|
55
|
+
end
|
56
|
+
|
57
|
+
def filter_anonymize_record(record)
|
58
|
+
@hash_keys.each do |hash_key, hash_algorithm|
|
59
|
+
next unless record.include?(hash_key)
|
60
|
+
if record[hash_key].is_a?(Array)
|
61
|
+
record[hash_key] = record[hash_key].collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
|
62
|
+
else
|
63
|
+
record[hash_key] = anonymize(record[hash_key], hash_algorithm, @hash_salt)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
return record
|
67
|
+
end
|
68
|
+
|
69
|
+
def anonymize(message, algorithm, salt)
|
70
|
+
case algorithm
|
71
|
+
when 'md5','sha1','sha256','sha384','sha512'
|
72
|
+
DIGEST[algorithm].call.hexdigest(salt + message.to_s)
|
73
|
+
when 'ipv4_mask'
|
74
|
+
IPAddr.new(message).mask(@ipv4_mask_subnet).to_s
|
75
|
+
else
|
76
|
+
$log.warn "anonymizer: unknown algorithm #{algorithm} has called."
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'test/unit'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'fluent/test'
|
15
|
+
unless ENV.has_key?('VERBOSE')
|
16
|
+
nulllogger = Object.new
|
17
|
+
nulllogger.instance_eval {|obj|
|
18
|
+
def method_missing(method, *args)
|
19
|
+
# pass
|
20
|
+
end
|
21
|
+
}
|
22
|
+
$log = nulllogger
|
23
|
+
end
|
24
|
+
|
25
|
+
require 'fluent/plugin/out_anonymizer'
|
26
|
+
|
27
|
+
class Test::Unit::TestCase
|
28
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class AnonymizerOutputTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
md5_keys data_for_md5
|
10
|
+
sha1_keys data_for_sha1
|
11
|
+
sha256_keys data_for_sha256
|
12
|
+
sha384_keys data_for_sha384
|
13
|
+
sha512_keys data_for_sha512
|
14
|
+
hash_salt test_salt_string
|
15
|
+
ipv4_mask_keys host
|
16
|
+
ipv4_mask_subnet 24
|
17
|
+
remove_tag_prefix input.
|
18
|
+
add_tag_prefix anonymized.
|
19
|
+
]
|
20
|
+
|
21
|
+
CONFIG_MULTI_KEYS = %[
|
22
|
+
sha1_keys member_id, mail, telephone
|
23
|
+
ipv4_mask_keys host
|
24
|
+
ipv4_mask_subnet 16
|
25
|
+
remove_tag_prefix input.
|
26
|
+
add_tag_prefix anonymized.
|
27
|
+
]
|
28
|
+
|
29
|
+
def create_driver(conf=CONFIG,tag='test')
|
30
|
+
Fluent::Test::OutputTestDriver.new(Fluent::AnonymizerOutput, tag).configure(conf)
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_configure
|
34
|
+
assert_raise(Fluent::ConfigError) {
|
35
|
+
d = create_driver('')
|
36
|
+
}
|
37
|
+
assert_raise(Fluent::ConfigError) {
|
38
|
+
d = create_driver('unknown_keys')
|
39
|
+
}
|
40
|
+
d = create_driver(CONFIG)
|
41
|
+
puts d.instance.inspect
|
42
|
+
assert_equal 'test_salt_string', d.instance.config['hash_salt']
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_emit
|
46
|
+
d1 = create_driver(CONFIG, 'input.access')
|
47
|
+
d1.run do
|
48
|
+
d1.emit({
|
49
|
+
'host' => '10.102.3.80',
|
50
|
+
'data_for_md5' => '12345',
|
51
|
+
'data_for_sha1' => '12345',
|
52
|
+
'data_for_sha256' => '12345',
|
53
|
+
'data_for_sha384' => '12345',
|
54
|
+
'data_for_sha512' => '12345'
|
55
|
+
})
|
56
|
+
end
|
57
|
+
emits = d1.emits
|
58
|
+
assert_equal 1, emits.length
|
59
|
+
p emits[0]
|
60
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
61
|
+
assert_equal '10.102.3.0', emits[0][2]['host']
|
62
|
+
assert_equal '9138bd41172f5485f7b6eee3afcd0d62', emits[0][2]['data_for_md5']
|
63
|
+
assert_equal 'ee98db51658d38580b1cf788db19ad06e51a32f7', emits[0][2]['data_for_sha1']
|
64
|
+
assert_equal 'd53d15615b19597b0f95a984a132ed5164ba9676bf3cb28e018d28feaa2ea6fd', emits[0][2]['data_for_sha256']
|
65
|
+
assert_equal '6e9cd6d84ea371a72148b418f1a8cb2534da114bc2186d36ec6f14fd5c237b6f2e460f409dda89b7e42a14b7da8a8131', emits[0][2]['data_for_sha384']
|
66
|
+
assert_equal 'adcf4e5d1e52f57f67d8b0cd85051158d7362103d7ed4cb6302445c2708eff4b17cb309cf5d09fd5cf76615c75652bd29d1707ce689a28e8700afd7a7439ef20', emits[0][2]['data_for_sha512']
|
67
|
+
end
|
68
|
+
|
69
|
+
def test_emit_multi_keys
|
70
|
+
d1 = create_driver(CONFIG_MULTI_KEYS, 'input.access')
|
71
|
+
d1.run do
|
72
|
+
d1.emit({
|
73
|
+
'host' => '10.102.3.80',
|
74
|
+
'member_id' => '12345',
|
75
|
+
'mail' => 'example@example.com',
|
76
|
+
'telephone' => '00-0000-0000',
|
77
|
+
'action' => 'signup'
|
78
|
+
})
|
79
|
+
end
|
80
|
+
emits = d1.emits
|
81
|
+
assert_equal 1, emits.length
|
82
|
+
p emits[0]
|
83
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
84
|
+
assert_equal '10.102.0.0', emits[0][2]['host']
|
85
|
+
assert_equal '8cb2237d0679ca88db6464eac60da96345513964', emits[0][2]['member_id']
|
86
|
+
assert_equal '914fec35ce8bfa1a067581032f26b053591ee38a', emits[0][2]['mail']
|
87
|
+
assert_equal 'ce164718b94212332187eb8420903b46b334d609', emits[0][2]['telephone']
|
88
|
+
assert_equal 'signup', emits[0][2]['action']
|
89
|
+
end
|
90
|
+
end
|
metadata
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-anonymizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Kentaro Yoshida
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-11-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: fluentd
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description:
|
63
|
+
email:
|
64
|
+
- y.ken.studio@gmail.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- .travis.yml
|
71
|
+
- Gemfile
|
72
|
+
- LICENSE
|
73
|
+
- README.md
|
74
|
+
- Rakefile
|
75
|
+
- fluent-plugin-anonymizer.gemspec
|
76
|
+
- lib/fluent/plugin/out_anonymizer.rb
|
77
|
+
- test/helper.rb
|
78
|
+
- test/plugin/test_out_anonymizer.rb
|
79
|
+
homepage: https://github.com/y-ken/fluent-plugin-anonymizer
|
80
|
+
licenses:
|
81
|
+
- Apache License, Version 2.0
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ! '>='
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
none: false
|
94
|
+
requirements:
|
95
|
+
- - ! '>='
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project:
|
100
|
+
rubygems_version: 1.8.23
|
101
|
+
signing_key:
|
102
|
+
specification_version: 3
|
103
|
+
summary: Fluentd filter output plugin to anonymize records. This data masking plugin
|
104
|
+
protects privacy data such as IP address, ID, email, phone number and so on.
|
105
|
+
test_files:
|
106
|
+
- test/helper.rb
|
107
|
+
- test/plugin/test_out_anonymizer.rb
|