fluent-plugin-anonymizer 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +9 -5
- data/fluent-plugin-anonymizer.gemspec +2 -2
- data/lib/fluent/plugin/out_anonymizer.rb +16 -7
- data/test/plugin/test_out_anonymizer.rb +27 -3
- metadata +3 -3
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
5
|
-
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number,
|
5
|
+
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IPv4/IPv6 address and so on.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -18,7 +18,7 @@ gem install fluent-plugin-anonymizer
|
|
18
18
|
|
19
19
|
#### configuration
|
20
20
|
|
21
|
-
It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, rounding number with 24bit netmask
|
21
|
+
It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, auto-detecting IPv4/IPv6 and rounding number with 24bit(IPv4) or 104bit(IPv6) netmask using `ipaddr_mask_keys` and `ipv4_mask_subnet`, `ipv6_mask_subnet` option.
|
22
22
|
|
23
23
|
`````
|
24
24
|
<source>
|
@@ -29,8 +29,9 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
29
29
|
<match test.message>
|
30
30
|
type anonymizer
|
31
31
|
sha1_keys user_id, member_id, mail
|
32
|
-
|
32
|
+
ipaddr_mask_keys host
|
33
33
|
ipv4_mask_subnet 24
|
34
|
+
ipv6_mask_subnet 104
|
34
35
|
remove_tag_prefix test.
|
35
36
|
add_tag_prefix anonymized.
|
36
37
|
</match>
|
@@ -44,9 +45,11 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
44
45
|
|
45
46
|
`````
|
46
47
|
$ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
48
|
+
$ echo '{"host":"2001:db8:0:8d3:0:8a2e:70:7344","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
47
49
|
|
48
50
|
$ tail -f /var/log/td-agent/td-agent.log
|
49
|
-
|
51
|
+
2014-01-06 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
|
52
|
+
2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
|
50
53
|
`````
|
51
54
|
|
52
55
|
## Parameters
|
@@ -61,8 +64,9 @@ This salt affects for `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512
|
|
61
64
|
It is recommend to set a hash salt to prevent rainbow table attacks.
|
62
65
|
|
63
66
|
|
64
|
-
* `
|
67
|
+
* `ipaddr_mask_keys`
|
65
68
|
* `ipv4_mask_subnet` (default: 24)
|
69
|
+
* `ipv6_mask_subnet` (default: 104)
|
66
70
|
|
67
71
|
Round number for following one or more keys. It makes easy to aggregate calculation.
|
68
72
|
|
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-anonymizer"
|
7
|
-
spec.version = "0.0
|
7
|
+
spec.version = "0.1.0"
|
8
8
|
spec.authors = ["Kentaro Yoshida"]
|
9
9
|
spec.email = ["y.ken.studio@gmail.com"]
|
10
|
-
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number,
|
10
|
+
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IPv4/IPv6 address and so on.}
|
11
11
|
spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
|
12
12
|
spec.license = "Apache License, Version 2.0"
|
13
13
|
|
@@ -1,9 +1,10 @@
|
|
1
1
|
class Fluent::AnonymizerOutput < Fluent::Output
|
2
2
|
Fluent::Plugin.register_output('anonymizer', self)
|
3
3
|
|
4
|
-
HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512
|
4
|
+
HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
|
5
5
|
config_param :hash_salt, :string, :default => ''
|
6
6
|
config_param :ipv4_mask_subnet, :integer, :default => 24
|
7
|
+
config_param :ipv6_mask_subnet, :integer, :default => 104
|
7
8
|
|
8
9
|
include Fluent::HandleTagNameMixin
|
9
10
|
|
@@ -39,6 +40,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
39
40
|
if @hash_keys.count < 1
|
40
41
|
raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
|
41
42
|
end
|
43
|
+
$log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
|
42
44
|
|
43
45
|
if ( !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
|
44
46
|
raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
|
@@ -59,10 +61,15 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
59
61
|
end
|
60
62
|
|
61
63
|
def filter_anonymize_record(data, hash_algorithm)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
64
|
+
begin
|
65
|
+
if data.is_a?(Array)
|
66
|
+
data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
|
67
|
+
else
|
68
|
+
data = anonymize(data, hash_algorithm, @hash_salt)
|
69
|
+
end
|
70
|
+
rescue StandardError => e
|
71
|
+
$log.error "anonymizer: failed to anonymize record. :message=>#{e.message} :data=>#{data}"
|
72
|
+
$log.error e.backtrace.join("\n")
|
66
73
|
end
|
67
74
|
data
|
68
75
|
end
|
@@ -71,8 +78,10 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
71
78
|
case algorithm
|
72
79
|
when 'md5','sha1','sha256','sha384','sha512'
|
73
80
|
DIGEST[algorithm].call.hexdigest(salt + message.to_s)
|
74
|
-
when '
|
75
|
-
IPAddr.new(message)
|
81
|
+
when 'ipaddr_mask'
|
82
|
+
address = IPAddr.new(message)
|
83
|
+
subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
|
84
|
+
address.mask(subnet).to_s
|
76
85
|
else
|
77
86
|
$log.warn "anonymizer: unknown algorithm #{algorithm} has called."
|
78
87
|
end
|
@@ -12,7 +12,7 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
12
12
|
sha384_keys data_for_sha384
|
13
13
|
sha512_keys data_for_sha512
|
14
14
|
hash_salt test_salt_string
|
15
|
-
|
15
|
+
ipaddr_mask_keys host
|
16
16
|
ipv4_mask_subnet 24
|
17
17
|
remove_tag_prefix input.
|
18
18
|
add_tag_prefix anonymized.
|
@@ -20,7 +20,7 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
CONFIG_MULTI_KEYS = %[
|
22
22
|
sha1_keys member_id, mail, telephone
|
23
|
-
|
23
|
+
ipaddr_mask_keys host
|
24
24
|
ipv4_mask_subnet 16
|
25
25
|
remove_tag_prefix input.
|
26
26
|
add_tag_prefix anonymized.
|
@@ -28,7 +28,15 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
28
28
|
|
29
29
|
CONFIG_NEST_VALUE = %[
|
30
30
|
sha1_keys array,hash
|
31
|
-
|
31
|
+
ipaddr_mask_keys host
|
32
|
+
remove_tag_prefix input.
|
33
|
+
add_tag_prefix anonymized.
|
34
|
+
]
|
35
|
+
|
36
|
+
CONFIG_IPV6 = %[
|
37
|
+
ipaddr_mask_keys host
|
38
|
+
ipv4_mask_subnet 24
|
39
|
+
ipv6_mask_subnet 104
|
32
40
|
remove_tag_prefix input.
|
33
41
|
add_tag_prefix anonymized.
|
34
42
|
]
|
@@ -112,4 +120,20 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
112
120
|
assert_equal ["e3cbba8883fe746c6e35783c9404b4bc0c7ee9eb", "a4ac914c09d7c097fe1f4f96b897e625b6922069"], emits[0][2]['array']
|
113
121
|
assert_equal '1a1903d78aed9403649d61cb21ba6b489249761b', emits[0][2]['hash']
|
114
122
|
end
|
123
|
+
|
124
|
+
def test_emit_ipv6
|
125
|
+
d1 = create_driver(CONFIG_IPV6, 'input.access')
|
126
|
+
d1.run do
|
127
|
+
d1.emit({'host' => '10.102.3.80'})
|
128
|
+
d1.emit({'host' => '0:0:0:0:0:FFFF:129.144.52.38'})
|
129
|
+
d1.emit({'host' => '2001:db8:0:8d3:0:8a2e:70:7344'})
|
130
|
+
end
|
131
|
+
emits = d1.emits
|
132
|
+
assert_equal 3, emits.length
|
133
|
+
p emits
|
134
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
135
|
+
assert_equal '10.102.3.0', emits[0][2]['host']
|
136
|
+
assert_equal '::ffff:129.0.0.0', emits[1][2]['host']
|
137
|
+
assert_equal '2001:db8:0:8d3:0:8a2e::', emits[2][2]['host']
|
138
|
+
end
|
115
139
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-anonymizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -102,7 +102,7 @@ signing_key:
|
|
102
102
|
specification_version: 3
|
103
103
|
summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
|
104
104
|
algorithms. This data masking plugin protects privacy data such as ID, email, phone
|
105
|
-
number,
|
105
|
+
number, IPv4/IPv6 address and so on.
|
106
106
|
test_files:
|
107
107
|
- test/helper.rb
|
108
108
|
- test/plugin/test_out_anonymizer.rb
|