fluent-plugin-anonymizer 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +1 -0
- data/README.md +21 -5
- data/fluent-plugin-anonymizer.gemspec +2 -2
- data/lib/fluent/plugin/out_anonymizer.rb +24 -11
- data/test/plugin/test_out_anonymizer.rb +67 -36
- metadata +5 -5
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -2,10 +2,12 @@
|
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
5
|
-
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as
|
5
|
+
Fluentd filter output plugin to anonymize records with [OpenSSL::HMAC](http://docs.ruby-lang.org/ja/1.9.3/class/OpenSSL=3a=3aHMAC.html) of MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as UserID, Email, Phone number, IPv4/IPv6 address and so on.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
9
|
+
install with gem or fluent-gem command as:
|
10
|
+
|
9
11
|
`````
|
10
12
|
### native gem
|
11
13
|
gem install fluent-plugin-anonymizer
|
@@ -28,10 +30,19 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
28
30
|
|
29
31
|
<match test.message>
|
30
32
|
type anonymizer
|
33
|
+
|
34
|
+
# Specify hashing keys with comma
|
31
35
|
sha1_keys user_id, member_id, mail
|
36
|
+
|
37
|
+
# Set hash salt with any strings for more security
|
38
|
+
hash_salt mysaltstring
|
39
|
+
|
40
|
+
# Specify rounding address keys with comma and subnet mask
|
32
41
|
ipaddr_mask_keys host
|
33
42
|
ipv4_mask_subnet 24
|
34
43
|
ipv6_mask_subnet 104
|
44
|
+
|
45
|
+
# Set tag rename pattern
|
35
46
|
remove_tag_prefix test.
|
36
47
|
add_tag_prefix anonymized.
|
37
48
|
</match>
|
@@ -48,8 +59,8 @@ $ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}
|
|
48
59
|
$ echo '{"host":"2001:db8:0:8d3:0:8a2e:70:7344","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
49
60
|
|
50
61
|
$ tail -f /var/log/td-agent/td-agent.log
|
51
|
-
2014-01-06 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"
|
52
|
-
2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"
|
62
|
+
2014-01-06 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
|
63
|
+
2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
|
53
64
|
`````
|
54
65
|
|
55
66
|
## Parameters
|
@@ -88,13 +99,18 @@ Add original tag name into filtered record using SetTagKeyMixin.
|
|
88
99
|
|
89
100
|
set one or more option are required for editing tag name using HandleTagNameMixin.
|
90
101
|
|
102
|
+
* tag
|
103
|
+
|
104
|
+
On using this option [like 'tag anonymized.${tag}' with tag placeholder](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L153), it will be overwrite after these options affected. which are remove_tag_prefix, remove_tag_suffix, add_tag_prefix and add_tag_suffix.
|
105
|
+
|
91
106
|
## Notes
|
92
107
|
|
93
|
-
* hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#
|
108
|
+
* hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L91).
|
94
109
|
|
95
110
|
## Blog Articles
|
96
111
|
|
97
|
-
*
|
112
|
+
* 個人情報を難読化するfluent-plugin-anonymizerをリリースしました #fluentd - Y-Ken Studio
|
113
|
+
http://y-ken.hatenablog.com/entry/fluent-plugin-anonymizer-has-released
|
98
114
|
|
99
115
|
## TODO
|
100
116
|
|
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-anonymizer"
|
7
|
-
spec.version = "0.
|
7
|
+
spec.version = "0.2.0"
|
8
8
|
spec.authors = ["Kentaro Yoshida"]
|
9
9
|
spec.email = ["y.ken.studio@gmail.com"]
|
10
|
-
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as
|
10
|
+
spec.summary = %q{Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as UserID, Email, Phone number, IPv4/IPv6 address and so on.}
|
11
11
|
spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
|
12
12
|
spec.license = "Apache License, Version 2.0"
|
13
13
|
|
@@ -2,6 +2,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
2
2
|
Fluent::Plugin.register_output('anonymizer', self)
|
3
3
|
|
4
4
|
HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
|
5
|
+
config_param :tag, :string, :default => nil
|
5
6
|
config_param :hash_salt, :string, :default => ''
|
6
7
|
config_param :ipv4_mask_subnet, :integer, :default => 24
|
7
8
|
config_param :ipv6_mask_subnet, :integer, :default => 104
|
@@ -12,15 +13,15 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
12
13
|
config_set_default :include_tag_key, false
|
13
14
|
|
14
15
|
DIGEST = {
|
15
|
-
"md5" => Proc.new { Digest
|
16
|
-
"sha1" => Proc.new { Digest
|
17
|
-
"sha256" => Proc.new { Digest
|
18
|
-
"sha384" => Proc.new { Digest
|
19
|
-
"sha512" => Proc.new { Digest
|
16
|
+
"md5" => Proc.new { OpenSSL::Digest.new('md5') },
|
17
|
+
"sha1" => Proc.new { OpenSSL::Digest.new('sha1') },
|
18
|
+
"sha256" => Proc.new { OpenSSL::Digest.new('sha256') },
|
19
|
+
"sha384" => Proc.new { OpenSSL::Digest.new('sha384') },
|
20
|
+
"sha512" => Proc.new { OpenSSL::Digest.new('sha512') }
|
20
21
|
}
|
21
22
|
|
22
23
|
def initialize
|
23
|
-
require '
|
24
|
+
require 'openssl'
|
24
25
|
require 'ipaddr'
|
25
26
|
super
|
26
27
|
end
|
@@ -42,7 +43,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
42
43
|
end
|
43
44
|
$log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
|
44
45
|
|
45
|
-
if ( !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
|
46
|
+
if ( !@tag && !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
|
46
47
|
raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
|
47
48
|
end
|
48
49
|
end
|
@@ -53,13 +54,25 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
53
54
|
next unless record.include?(hash_key)
|
54
55
|
record[hash_key] = filter_anonymize_record(record[hash_key], hash_algorithm)
|
55
56
|
end
|
56
|
-
|
57
|
-
filter_record(
|
58
|
-
|
57
|
+
emit_tag = tag.dup
|
58
|
+
filter_record(emit_tag, time, record)
|
59
|
+
emit_tag = rewrite_tag(@tag, emit_tag) if @tag
|
60
|
+
Fluent::Engine.emit(emit_tag, time, record)
|
59
61
|
end
|
60
62
|
chain.next
|
61
63
|
end
|
62
64
|
|
65
|
+
def rewrite_tag(rewritetag, tag)
|
66
|
+
placeholder = {
|
67
|
+
'${tag}' => tag,
|
68
|
+
'__TAG__' => tag
|
69
|
+
}
|
70
|
+
return rewritetag.gsub(/(\${[a-z_]+(\[[0-9]+\])?}|__[A-Z_]+__)/) do
|
71
|
+
$log.warn "anonymizer: unknown placeholder found. :placeholder=>#{$1} :tag=>#{tag} :rewritetag=>#{rewritetag}" unless placeholder.include?($1)
|
72
|
+
placeholder[$1]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
63
76
|
def filter_anonymize_record(data, hash_algorithm)
|
64
77
|
begin
|
65
78
|
if data.is_a?(Array)
|
@@ -77,7 +90,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
77
90
|
def anonymize(message, algorithm, salt)
|
78
91
|
case algorithm
|
79
92
|
when 'md5','sha1','sha256','sha384','sha512'
|
80
|
-
DIGEST[algorithm].call
|
93
|
+
OpenSSL::HMAC.hexdigest(DIGEST[algorithm].call, salt, message.to_s)
|
81
94
|
when 'ipaddr_mask'
|
82
95
|
address = IPAddr.new(message)
|
83
96
|
subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
|
@@ -18,29 +18,6 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
18
18
|
add_tag_prefix anonymized.
|
19
19
|
]
|
20
20
|
|
21
|
-
CONFIG_MULTI_KEYS = %[
|
22
|
-
sha1_keys member_id, mail, telephone
|
23
|
-
ipaddr_mask_keys host
|
24
|
-
ipv4_mask_subnet 16
|
25
|
-
remove_tag_prefix input.
|
26
|
-
add_tag_prefix anonymized.
|
27
|
-
]
|
28
|
-
|
29
|
-
CONFIG_NEST_VALUE = %[
|
30
|
-
sha1_keys array,hash
|
31
|
-
ipaddr_mask_keys host
|
32
|
-
remove_tag_prefix input.
|
33
|
-
add_tag_prefix anonymized.
|
34
|
-
]
|
35
|
-
|
36
|
-
CONFIG_IPV6 = %[
|
37
|
-
ipaddr_mask_keys host
|
38
|
-
ipv4_mask_subnet 24
|
39
|
-
ipv6_mask_subnet 104
|
40
|
-
remove_tag_prefix input.
|
41
|
-
add_tag_prefix anonymized.
|
42
|
-
]
|
43
|
-
|
44
21
|
def create_driver(conf=CONFIG,tag='test')
|
45
22
|
Fluent::Test::OutputTestDriver.new(Fluent::AnonymizerOutput, tag).configure(conf)
|
46
23
|
end
|
@@ -74,18 +51,25 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
74
51
|
p emits[0]
|
75
52
|
assert_equal 'anonymized.access', emits[0][0] # tag
|
76
53
|
assert_equal '10.102.3.0', emits[0][2]['host']
|
77
|
-
assert_equal '
|
78
|
-
assert_equal '
|
79
|
-
assert_equal '
|
80
|
-
assert_equal '
|
81
|
-
assert_equal '
|
54
|
+
assert_equal 'e738cbde82a514dc60582cd467c240ed', emits[0][2]['data_for_md5']
|
55
|
+
assert_equal '69cf099459c06b852ede96d39b710027727d13c6', emits[0][2]['data_for_sha1']
|
56
|
+
assert_equal '804d83b8c6a3e01498d40677652b084333196d8e548ee5a8710fbd0e1e115527', emits[0][2]['data_for_sha256']
|
57
|
+
assert_equal '6c90c389bbdfc210416b9318df3f526b4f218f8a8df3a67020353c35da22dc154460b18f22a8009a747b3ef2975acae7', emits[0][2]['data_for_sha384']
|
58
|
+
assert_equal 'cdbb897e6f3a092161bdb51164eb2996b75b00555f568219628ff15cd2929865d217af5dff9c32ddc908b75a89baec96b3e9a0da120e919f5246de0f1bc54c58', emits[0][2]['data_for_sha512']
|
82
59
|
end
|
83
60
|
|
84
61
|
def test_emit_multi_keys
|
85
|
-
d1 = create_driver(
|
62
|
+
d1 = create_driver(%[
|
63
|
+
sha1_keys member_id, mail, telephone
|
64
|
+
ipaddr_mask_keys host, host2
|
65
|
+
ipv4_mask_subnet 16
|
66
|
+
remove_tag_prefix input.
|
67
|
+
add_tag_prefix anonymized.
|
68
|
+
], 'input.access')
|
86
69
|
d1.run do
|
87
70
|
d1.emit({
|
88
71
|
'host' => '10.102.3.80',
|
72
|
+
'host2' => '10.102.3.80',
|
89
73
|
'member_id' => '12345',
|
90
74
|
'mail' => 'example@example.com',
|
91
75
|
'telephone' => '00-0000-0000',
|
@@ -97,14 +81,20 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
97
81
|
p emits[0]
|
98
82
|
assert_equal 'anonymized.access', emits[0][0] # tag
|
99
83
|
assert_equal '10.102.0.0', emits[0][2]['host']
|
100
|
-
assert_equal '
|
101
|
-
assert_equal '
|
102
|
-
assert_equal '
|
84
|
+
assert_equal '10.102.0.0', emits[0][2]['host2']
|
85
|
+
assert_equal '774472f0dc892f0b3299cae8dadacd0a74ba59d7', emits[0][2]['member_id']
|
86
|
+
assert_equal 'd7b728209f5dd8df10cecbced30394c3c7fc2c82', emits[0][2]['mail']
|
87
|
+
assert_equal 'a67f73c395105a358a03a0f127bf64b5495e7841', emits[0][2]['telephone']
|
103
88
|
assert_equal 'signup', emits[0][2]['action']
|
104
89
|
end
|
105
90
|
|
106
91
|
def test_emit_nest_value
|
107
|
-
d1 = create_driver(
|
92
|
+
d1 = create_driver(%[
|
93
|
+
sha1_keys array,hash
|
94
|
+
ipaddr_mask_keys host
|
95
|
+
remove_tag_prefix input.
|
96
|
+
add_tag_prefix anonymized.
|
97
|
+
], 'input.access')
|
108
98
|
d1.run do
|
109
99
|
d1.emit({
|
110
100
|
'host' => '10.102.3.80',
|
@@ -117,12 +107,18 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
117
107
|
p emits[0]
|
118
108
|
assert_equal 'anonymized.access', emits[0][0] # tag
|
119
109
|
assert_equal '10.102.3.0', emits[0][2]['host']
|
120
|
-
assert_equal ["
|
121
|
-
assert_equal '
|
110
|
+
assert_equal ["c1628fc0d473cb21b15607c10bdcad19d1a42e24", "ea87abc249f9f2d430edb816514bffeffd3e698e"], emits[0][2]['array']
|
111
|
+
assert_equal '28fe85deb0d1d39ee14c49c62bc4773b0338247b', emits[0][2]['hash']
|
122
112
|
end
|
123
113
|
|
124
114
|
def test_emit_ipv6
|
125
|
-
d1 = create_driver(
|
115
|
+
d1 = create_driver(%[
|
116
|
+
ipaddr_mask_keys host
|
117
|
+
ipv4_mask_subnet 24
|
118
|
+
ipv6_mask_subnet 104
|
119
|
+
remove_tag_prefix input.
|
120
|
+
add_tag_prefix anonymized.
|
121
|
+
], 'input.access')
|
126
122
|
d1.run do
|
127
123
|
d1.emit({'host' => '10.102.3.80'})
|
128
124
|
d1.emit({'host' => '0:0:0:0:0:FFFF:129.144.52.38'})
|
@@ -136,4 +132,39 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
136
132
|
assert_equal '::ffff:129.0.0.0', emits[1][2]['host']
|
137
133
|
assert_equal '2001:db8:0:8d3:0:8a2e::', emits[2][2]['host']
|
138
134
|
end
|
135
|
+
|
136
|
+
def test_emit_tag_static
|
137
|
+
d1 = create_driver(%[
|
138
|
+
sha1_keys member_id
|
139
|
+
tag anonymized.message
|
140
|
+
], 'input.access')
|
141
|
+
d1.run do
|
142
|
+
d1.emit({
|
143
|
+
'member_id' => '12345',
|
144
|
+
})
|
145
|
+
end
|
146
|
+
emits = d1.emits
|
147
|
+
assert_equal 1, emits.length
|
148
|
+
p emits[0]
|
149
|
+
assert_equal 'anonymized.message', emits[0][0] # tag
|
150
|
+
assert_equal '774472f0dc892f0b3299cae8dadacd0a74ba59d7', emits[0][2]['member_id']
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_emit_tag_placeholder
|
154
|
+
d1 = create_driver(%[
|
155
|
+
sha1_keys member_id
|
156
|
+
tag anonymized.${tag}
|
157
|
+
remove_tag_prefix input.
|
158
|
+
], 'input.access')
|
159
|
+
d1.run do
|
160
|
+
d1.emit({
|
161
|
+
'member_id' => '12345',
|
162
|
+
})
|
163
|
+
end
|
164
|
+
emits = d1.emits
|
165
|
+
assert_equal 1, emits.length
|
166
|
+
p emits[0]
|
167
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
168
|
+
assert_equal '774472f0dc892f0b3299cae8dadacd0a74ba59d7', emits[0][2]['member_id']
|
169
|
+
end
|
139
170
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-anonymizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-01-
|
12
|
+
date: 2014-01-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -100,9 +100,9 @@ rubyforge_project:
|
|
100
100
|
rubygems_version: 1.8.23
|
101
101
|
signing_key:
|
102
102
|
specification_version: 3
|
103
|
-
summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
|
104
|
-
algorithms. This data masking plugin protects privacy data such as
|
105
|
-
number, IPv4/IPv6 address and so on.
|
103
|
+
summary: Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1/SHA256/SHA384/SHA512
|
104
|
+
algorithms. This data masking plugin protects privacy data such as UserID, Email,
|
105
|
+
Phone number, IPv4/IPv6 address and so on.
|
106
106
|
test_files:
|
107
107
|
- test/helper.rb
|
108
108
|
- test/plugin/test_out_anonymizer.rb
|