fluent-plugin-anonymizer 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +1 -0
- data/README.md +21 -5
- data/fluent-plugin-anonymizer.gemspec +2 -2
- data/lib/fluent/plugin/out_anonymizer.rb +24 -11
- data/test/plugin/test_out_anonymizer.rb +67 -36
- metadata +5 -5
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -2,10 +2,12 @@
|
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
5
|
-
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as
|
5
|
+
Fluentd filter output plugin to anonymize records with [OpenSSL::HMAC](http://docs.ruby-lang.org/ja/1.9.3/class/OpenSSL=3a=3aHMAC.html) of MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as UserID, Email, Phone number, IPv4/IPv6 address and so on.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
9
|
+
install with gem or fluent-gem command as:
|
10
|
+
|
9
11
|
`````
|
10
12
|
### native gem
|
11
13
|
gem install fluent-plugin-anonymizer
|
@@ -28,10 +30,19 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
28
30
|
|
29
31
|
<match test.message>
|
30
32
|
type anonymizer
|
33
|
+
|
34
|
+
# Specify hashing keys with comma
|
31
35
|
sha1_keys user_id, member_id, mail
|
36
|
+
|
37
|
+
# Set hash salt with any strings for more security
|
38
|
+
hash_salt mysaltstring
|
39
|
+
|
40
|
+
# Specify rounding address keys with comma and subnet mask
|
32
41
|
ipaddr_mask_keys host
|
33
42
|
ipv4_mask_subnet 24
|
34
43
|
ipv6_mask_subnet 104
|
44
|
+
|
45
|
+
# Set tag rename pattern
|
35
46
|
remove_tag_prefix test.
|
36
47
|
add_tag_prefix anonymized.
|
37
48
|
</match>
|
@@ -48,8 +59,8 @@ $ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}
|
|
48
59
|
$ echo '{"host":"2001:db8:0:8d3:0:8a2e:70:7344","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
49
60
|
|
50
61
|
$ tail -f /var/log/td-agent/td-agent.log
|
51
|
-
2014-01-06 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"
|
52
|
-
2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"
|
62
|
+
2014-01-06 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
|
63
|
+
2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
|
53
64
|
`````
|
54
65
|
|
55
66
|
## Parameters
|
@@ -88,13 +99,18 @@ Add original tag name into filtered record using SetTagKeyMixin.
|
|
88
99
|
|
89
100
|
set one or more option are required for editing tag name using HandleTagNameMixin.
|
90
101
|
|
102
|
+
* tag
|
103
|
+
|
104
|
+
On using this option [like 'tag anonymized.${tag}' with tag placeholder](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L153), it will be overwrite after these options affected. which are remove_tag_prefix, remove_tag_suffix, add_tag_prefix and add_tag_suffix.
|
105
|
+
|
91
106
|
## Notes
|
92
107
|
|
93
|
-
* hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#
|
108
|
+
* hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L91).
|
94
109
|
|
95
110
|
## Blog Articles
|
96
111
|
|
97
|
-
*
|
112
|
+
* 個人情報を難読化するfluent-plugin-anonymizerをリリースしました #fluentd - Y-Ken Studio
|
113
|
+
http://y-ken.hatenablog.com/entry/fluent-plugin-anonymizer-has-released
|
98
114
|
|
99
115
|
## TODO
|
100
116
|
|
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-anonymizer"
|
7
|
-
spec.version = "0.
|
7
|
+
spec.version = "0.2.0"
|
8
8
|
spec.authors = ["Kentaro Yoshida"]
|
9
9
|
spec.email = ["y.ken.studio@gmail.com"]
|
10
|
-
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as
|
10
|
+
spec.summary = %q{Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as UserID, Email, Phone number, IPv4/IPv6 address and so on.}
|
11
11
|
spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
|
12
12
|
spec.license = "Apache License, Version 2.0"
|
13
13
|
|
@@ -2,6 +2,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
2
2
|
Fluent::Plugin.register_output('anonymizer', self)
|
3
3
|
|
4
4
|
HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
|
5
|
+
config_param :tag, :string, :default => nil
|
5
6
|
config_param :hash_salt, :string, :default => ''
|
6
7
|
config_param :ipv4_mask_subnet, :integer, :default => 24
|
7
8
|
config_param :ipv6_mask_subnet, :integer, :default => 104
|
@@ -12,15 +13,15 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
12
13
|
config_set_default :include_tag_key, false
|
13
14
|
|
14
15
|
DIGEST = {
|
15
|
-
"md5" => Proc.new { Digest
|
16
|
-
"sha1" => Proc.new { Digest
|
17
|
-
"sha256" => Proc.new { Digest
|
18
|
-
"sha384" => Proc.new { Digest
|
19
|
-
"sha512" => Proc.new { Digest
|
16
|
+
"md5" => Proc.new { OpenSSL::Digest.new('md5') },
|
17
|
+
"sha1" => Proc.new { OpenSSL::Digest.new('sha1') },
|
18
|
+
"sha256" => Proc.new { OpenSSL::Digest.new('sha256') },
|
19
|
+
"sha384" => Proc.new { OpenSSL::Digest.new('sha384') },
|
20
|
+
"sha512" => Proc.new { OpenSSL::Digest.new('sha512') }
|
20
21
|
}
|
21
22
|
|
22
23
|
def initialize
|
23
|
-
require '
|
24
|
+
require 'openssl'
|
24
25
|
require 'ipaddr'
|
25
26
|
super
|
26
27
|
end
|
@@ -42,7 +43,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
42
43
|
end
|
43
44
|
$log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
|
44
45
|
|
45
|
-
if ( !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
|
46
|
+
if ( !@tag && !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
|
46
47
|
raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
|
47
48
|
end
|
48
49
|
end
|
@@ -53,13 +54,25 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
53
54
|
next unless record.include?(hash_key)
|
54
55
|
record[hash_key] = filter_anonymize_record(record[hash_key], hash_algorithm)
|
55
56
|
end
|
56
|
-
|
57
|
-
filter_record(
|
58
|
-
|
57
|
+
emit_tag = tag.dup
|
58
|
+
filter_record(emit_tag, time, record)
|
59
|
+
emit_tag = rewrite_tag(@tag, emit_tag) if @tag
|
60
|
+
Fluent::Engine.emit(emit_tag, time, record)
|
59
61
|
end
|
60
62
|
chain.next
|
61
63
|
end
|
62
64
|
|
65
|
+
def rewrite_tag(rewritetag, tag)
|
66
|
+
placeholder = {
|
67
|
+
'${tag}' => tag,
|
68
|
+
'__TAG__' => tag
|
69
|
+
}
|
70
|
+
return rewritetag.gsub(/(\${[a-z_]+(\[[0-9]+\])?}|__[A-Z_]+__)/) do
|
71
|
+
$log.warn "anonymizer: unknown placeholder found. :placeholder=>#{$1} :tag=>#{tag} :rewritetag=>#{rewritetag}" unless placeholder.include?($1)
|
72
|
+
placeholder[$1]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
63
76
|
def filter_anonymize_record(data, hash_algorithm)
|
64
77
|
begin
|
65
78
|
if data.is_a?(Array)
|
@@ -77,7 +90,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
77
90
|
def anonymize(message, algorithm, salt)
|
78
91
|
case algorithm
|
79
92
|
when 'md5','sha1','sha256','sha384','sha512'
|
80
|
-
DIGEST[algorithm].call
|
93
|
+
OpenSSL::HMAC.hexdigest(DIGEST[algorithm].call, salt, message.to_s)
|
81
94
|
when 'ipaddr_mask'
|
82
95
|
address = IPAddr.new(message)
|
83
96
|
subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
|
@@ -18,29 +18,6 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
18
18
|
add_tag_prefix anonymized.
|
19
19
|
]
|
20
20
|
|
21
|
-
CONFIG_MULTI_KEYS = %[
|
22
|
-
sha1_keys member_id, mail, telephone
|
23
|
-
ipaddr_mask_keys host
|
24
|
-
ipv4_mask_subnet 16
|
25
|
-
remove_tag_prefix input.
|
26
|
-
add_tag_prefix anonymized.
|
27
|
-
]
|
28
|
-
|
29
|
-
CONFIG_NEST_VALUE = %[
|
30
|
-
sha1_keys array,hash
|
31
|
-
ipaddr_mask_keys host
|
32
|
-
remove_tag_prefix input.
|
33
|
-
add_tag_prefix anonymized.
|
34
|
-
]
|
35
|
-
|
36
|
-
CONFIG_IPV6 = %[
|
37
|
-
ipaddr_mask_keys host
|
38
|
-
ipv4_mask_subnet 24
|
39
|
-
ipv6_mask_subnet 104
|
40
|
-
remove_tag_prefix input.
|
41
|
-
add_tag_prefix anonymized.
|
42
|
-
]
|
43
|
-
|
44
21
|
def create_driver(conf=CONFIG,tag='test')
|
45
22
|
Fluent::Test::OutputTestDriver.new(Fluent::AnonymizerOutput, tag).configure(conf)
|
46
23
|
end
|
@@ -74,18 +51,25 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
74
51
|
p emits[0]
|
75
52
|
assert_equal 'anonymized.access', emits[0][0] # tag
|
76
53
|
assert_equal '10.102.3.0', emits[0][2]['host']
|
77
|
-
assert_equal '
|
78
|
-
assert_equal '
|
79
|
-
assert_equal '
|
80
|
-
assert_equal '
|
81
|
-
assert_equal '
|
54
|
+
assert_equal 'e738cbde82a514dc60582cd467c240ed', emits[0][2]['data_for_md5']
|
55
|
+
assert_equal '69cf099459c06b852ede96d39b710027727d13c6', emits[0][2]['data_for_sha1']
|
56
|
+
assert_equal '804d83b8c6a3e01498d40677652b084333196d8e548ee5a8710fbd0e1e115527', emits[0][2]['data_for_sha256']
|
57
|
+
assert_equal '6c90c389bbdfc210416b9318df3f526b4f218f8a8df3a67020353c35da22dc154460b18f22a8009a747b3ef2975acae7', emits[0][2]['data_for_sha384']
|
58
|
+
assert_equal 'cdbb897e6f3a092161bdb51164eb2996b75b00555f568219628ff15cd2929865d217af5dff9c32ddc908b75a89baec96b3e9a0da120e919f5246de0f1bc54c58', emits[0][2]['data_for_sha512']
|
82
59
|
end
|
83
60
|
|
84
61
|
def test_emit_multi_keys
|
85
|
-
d1 = create_driver(
|
62
|
+
d1 = create_driver(%[
|
63
|
+
sha1_keys member_id, mail, telephone
|
64
|
+
ipaddr_mask_keys host, host2
|
65
|
+
ipv4_mask_subnet 16
|
66
|
+
remove_tag_prefix input.
|
67
|
+
add_tag_prefix anonymized.
|
68
|
+
], 'input.access')
|
86
69
|
d1.run do
|
87
70
|
d1.emit({
|
88
71
|
'host' => '10.102.3.80',
|
72
|
+
'host2' => '10.102.3.80',
|
89
73
|
'member_id' => '12345',
|
90
74
|
'mail' => 'example@example.com',
|
91
75
|
'telephone' => '00-0000-0000',
|
@@ -97,14 +81,20 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
97
81
|
p emits[0]
|
98
82
|
assert_equal 'anonymized.access', emits[0][0] # tag
|
99
83
|
assert_equal '10.102.0.0', emits[0][2]['host']
|
100
|
-
assert_equal '
|
101
|
-
assert_equal '
|
102
|
-
assert_equal '
|
84
|
+
assert_equal '10.102.0.0', emits[0][2]['host2']
|
85
|
+
assert_equal '774472f0dc892f0b3299cae8dadacd0a74ba59d7', emits[0][2]['member_id']
|
86
|
+
assert_equal 'd7b728209f5dd8df10cecbced30394c3c7fc2c82', emits[0][2]['mail']
|
87
|
+
assert_equal 'a67f73c395105a358a03a0f127bf64b5495e7841', emits[0][2]['telephone']
|
103
88
|
assert_equal 'signup', emits[0][2]['action']
|
104
89
|
end
|
105
90
|
|
106
91
|
def test_emit_nest_value
|
107
|
-
d1 = create_driver(
|
92
|
+
d1 = create_driver(%[
|
93
|
+
sha1_keys array,hash
|
94
|
+
ipaddr_mask_keys host
|
95
|
+
remove_tag_prefix input.
|
96
|
+
add_tag_prefix anonymized.
|
97
|
+
], 'input.access')
|
108
98
|
d1.run do
|
109
99
|
d1.emit({
|
110
100
|
'host' => '10.102.3.80',
|
@@ -117,12 +107,18 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
117
107
|
p emits[0]
|
118
108
|
assert_equal 'anonymized.access', emits[0][0] # tag
|
119
109
|
assert_equal '10.102.3.0', emits[0][2]['host']
|
120
|
-
assert_equal ["
|
121
|
-
assert_equal '
|
110
|
+
assert_equal ["c1628fc0d473cb21b15607c10bdcad19d1a42e24", "ea87abc249f9f2d430edb816514bffeffd3e698e"], emits[0][2]['array']
|
111
|
+
assert_equal '28fe85deb0d1d39ee14c49c62bc4773b0338247b', emits[0][2]['hash']
|
122
112
|
end
|
123
113
|
|
124
114
|
def test_emit_ipv6
|
125
|
-
d1 = create_driver(
|
115
|
+
d1 = create_driver(%[
|
116
|
+
ipaddr_mask_keys host
|
117
|
+
ipv4_mask_subnet 24
|
118
|
+
ipv6_mask_subnet 104
|
119
|
+
remove_tag_prefix input.
|
120
|
+
add_tag_prefix anonymized.
|
121
|
+
], 'input.access')
|
126
122
|
d1.run do
|
127
123
|
d1.emit({'host' => '10.102.3.80'})
|
128
124
|
d1.emit({'host' => '0:0:0:0:0:FFFF:129.144.52.38'})
|
@@ -136,4 +132,39 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
136
132
|
assert_equal '::ffff:129.0.0.0', emits[1][2]['host']
|
137
133
|
assert_equal '2001:db8:0:8d3:0:8a2e::', emits[2][2]['host']
|
138
134
|
end
|
135
|
+
|
136
|
+
def test_emit_tag_static
|
137
|
+
d1 = create_driver(%[
|
138
|
+
sha1_keys member_id
|
139
|
+
tag anonymized.message
|
140
|
+
], 'input.access')
|
141
|
+
d1.run do
|
142
|
+
d1.emit({
|
143
|
+
'member_id' => '12345',
|
144
|
+
})
|
145
|
+
end
|
146
|
+
emits = d1.emits
|
147
|
+
assert_equal 1, emits.length
|
148
|
+
p emits[0]
|
149
|
+
assert_equal 'anonymized.message', emits[0][0] # tag
|
150
|
+
assert_equal '774472f0dc892f0b3299cae8dadacd0a74ba59d7', emits[0][2]['member_id']
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_emit_tag_placeholder
|
154
|
+
d1 = create_driver(%[
|
155
|
+
sha1_keys member_id
|
156
|
+
tag anonymized.${tag}
|
157
|
+
remove_tag_prefix input.
|
158
|
+
], 'input.access')
|
159
|
+
d1.run do
|
160
|
+
d1.emit({
|
161
|
+
'member_id' => '12345',
|
162
|
+
})
|
163
|
+
end
|
164
|
+
emits = d1.emits
|
165
|
+
assert_equal 1, emits.length
|
166
|
+
p emits[0]
|
167
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
168
|
+
assert_equal '774472f0dc892f0b3299cae8dadacd0a74ba59d7', emits[0][2]['member_id']
|
169
|
+
end
|
139
170
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-anonymizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -100,9 +100,9 @@ rubyforge_project:
|
|
100
100
|
rubygems_version: 1.8.23
|
101
101
|
signing_key:
|
102
102
|
specification_version: 3
|
103
|
-
summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
|
104
|
-
algorithms. This data masking plugin protects privacy data such as
|
105
|
-
number, IPv4/IPv6 address and so on.
|
103
|
+
summary: Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1/SHA256/SHA384/SHA512
|
104
|
+
algorithms. This data masking plugin protects privacy data such as UserID, Email,
|
105
|
+
Phone number, IPv4/IPv6 address and so on.
|
106
106
|
test_files:
|
107
107
|
- test/helper.rb
|
108
108
|
- test/plugin/test_out_anonymizer.rb
|