fluent-plugin-anonymizer 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +9 -5
- data/fluent-plugin-anonymizer.gemspec +2 -2
- data/lib/fluent/plugin/out_anonymizer.rb +16 -7
- data/test/plugin/test_out_anonymizer.rb +27 -3
- metadata +3 -3
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
5
|
-
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number,
|
5
|
+
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IPv4/IPv6 address and so on.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -18,7 +18,7 @@ gem install fluent-plugin-anonymizer
|
|
18
18
|
|
19
19
|
#### configuration
|
20
20
|
|
21
|
-
It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, rounding number with 24bit netmask
|
21
|
+
It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, auto-detecting IPv4/IPv6 and rounding number with 24bit(IPv4) or 104bit(IPv6) netmask using `ipaddr_mask_keys` and `ipv4_mask_subnet`, `ipv6_mask_subnet` option.
|
22
22
|
|
23
23
|
`````
|
24
24
|
<source>
|
@@ -29,8 +29,9 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
29
29
|
<match test.message>
|
30
30
|
type anonymizer
|
31
31
|
sha1_keys user_id, member_id, mail
|
32
|
-
|
32
|
+
ipaddr_mask_keys host
|
33
33
|
ipv4_mask_subnet 24
|
34
|
+
ipv6_mask_subnet 104
|
34
35
|
remove_tag_prefix test.
|
35
36
|
add_tag_prefix anonymized.
|
36
37
|
</match>
|
@@ -44,9 +45,11 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
44
45
|
|
45
46
|
`````
|
46
47
|
$ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
48
|
+
$ echo '{"host":"2001:db8:0:8d3:0:8a2e:70:7344","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
47
49
|
|
48
50
|
$ tail -f /var/log/td-agent/td-agent.log
|
49
|
-
|
51
|
+
2014-01-06 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
|
52
|
+
2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
|
50
53
|
`````
|
51
54
|
|
52
55
|
## Parameters
|
@@ -61,8 +64,9 @@ This salt affects for `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512
|
|
61
64
|
It is recommend to set a hash salt to prevent rainbow table attacks.
|
62
65
|
|
63
66
|
|
64
|
-
* `
|
67
|
+
* `ipaddr_mask_keys`
|
65
68
|
* `ipv4_mask_subnet` (default: 24)
|
69
|
+
* `ipv6_mask_subnet` (default: 104)
|
66
70
|
|
67
71
|
Round number for following one or more keys. It makes easy to aggregate calculation.
|
68
72
|
|
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-anonymizer"
|
7
|
-
spec.version = "0.0
|
7
|
+
spec.version = "0.1.0"
|
8
8
|
spec.authors = ["Kentaro Yoshida"]
|
9
9
|
spec.email = ["y.ken.studio@gmail.com"]
|
10
|
-
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number,
|
10
|
+
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IPv4/IPv6 address and so on.}
|
11
11
|
spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
|
12
12
|
spec.license = "Apache License, Version 2.0"
|
13
13
|
|
@@ -1,9 +1,10 @@
|
|
1
1
|
class Fluent::AnonymizerOutput < Fluent::Output
|
2
2
|
Fluent::Plugin.register_output('anonymizer', self)
|
3
3
|
|
4
|
-
HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512
|
4
|
+
HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
|
5
5
|
config_param :hash_salt, :string, :default => ''
|
6
6
|
config_param :ipv4_mask_subnet, :integer, :default => 24
|
7
|
+
config_param :ipv6_mask_subnet, :integer, :default => 104
|
7
8
|
|
8
9
|
include Fluent::HandleTagNameMixin
|
9
10
|
|
@@ -39,6 +40,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
39
40
|
if @hash_keys.count < 1
|
40
41
|
raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
|
41
42
|
end
|
43
|
+
$log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
|
42
44
|
|
43
45
|
if ( !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
|
44
46
|
raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
|
@@ -59,10 +61,15 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
59
61
|
end
|
60
62
|
|
61
63
|
def filter_anonymize_record(data, hash_algorithm)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
64
|
+
begin
|
65
|
+
if data.is_a?(Array)
|
66
|
+
data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
|
67
|
+
else
|
68
|
+
data = anonymize(data, hash_algorithm, @hash_salt)
|
69
|
+
end
|
70
|
+
rescue StandardError => e
|
71
|
+
$log.error "anonymizer: failed to anonymize record. :message=>#{e.message} :data=>#{data}"
|
72
|
+
$log.error e.backtrace.join("\n")
|
66
73
|
end
|
67
74
|
data
|
68
75
|
end
|
@@ -71,8 +78,10 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
71
78
|
case algorithm
|
72
79
|
when 'md5','sha1','sha256','sha384','sha512'
|
73
80
|
DIGEST[algorithm].call.hexdigest(salt + message.to_s)
|
74
|
-
when '
|
75
|
-
IPAddr.new(message)
|
81
|
+
when 'ipaddr_mask'
|
82
|
+
address = IPAddr.new(message)
|
83
|
+
subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
|
84
|
+
address.mask(subnet).to_s
|
76
85
|
else
|
77
86
|
$log.warn "anonymizer: unknown algorithm #{algorithm} has called."
|
78
87
|
end
|
@@ -12,7 +12,7 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
12
12
|
sha384_keys data_for_sha384
|
13
13
|
sha512_keys data_for_sha512
|
14
14
|
hash_salt test_salt_string
|
15
|
-
|
15
|
+
ipaddr_mask_keys host
|
16
16
|
ipv4_mask_subnet 24
|
17
17
|
remove_tag_prefix input.
|
18
18
|
add_tag_prefix anonymized.
|
@@ -20,7 +20,7 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
CONFIG_MULTI_KEYS = %[
|
22
22
|
sha1_keys member_id, mail, telephone
|
23
|
-
|
23
|
+
ipaddr_mask_keys host
|
24
24
|
ipv4_mask_subnet 16
|
25
25
|
remove_tag_prefix input.
|
26
26
|
add_tag_prefix anonymized.
|
@@ -28,7 +28,15 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
28
28
|
|
29
29
|
CONFIG_NEST_VALUE = %[
|
30
30
|
sha1_keys array,hash
|
31
|
-
|
31
|
+
ipaddr_mask_keys host
|
32
|
+
remove_tag_prefix input.
|
33
|
+
add_tag_prefix anonymized.
|
34
|
+
]
|
35
|
+
|
36
|
+
CONFIG_IPV6 = %[
|
37
|
+
ipaddr_mask_keys host
|
38
|
+
ipv4_mask_subnet 24
|
39
|
+
ipv6_mask_subnet 104
|
32
40
|
remove_tag_prefix input.
|
33
41
|
add_tag_prefix anonymized.
|
34
42
|
]
|
@@ -112,4 +120,20 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
112
120
|
assert_equal ["e3cbba8883fe746c6e35783c9404b4bc0c7ee9eb", "a4ac914c09d7c097fe1f4f96b897e625b6922069"], emits[0][2]['array']
|
113
121
|
assert_equal '1a1903d78aed9403649d61cb21ba6b489249761b', emits[0][2]['hash']
|
114
122
|
end
|
123
|
+
|
124
|
+
def test_emit_ipv6
|
125
|
+
d1 = create_driver(CONFIG_IPV6, 'input.access')
|
126
|
+
d1.run do
|
127
|
+
d1.emit({'host' => '10.102.3.80'})
|
128
|
+
d1.emit({'host' => '0:0:0:0:0:FFFF:129.144.52.38'})
|
129
|
+
d1.emit({'host' => '2001:db8:0:8d3:0:8a2e:70:7344'})
|
130
|
+
end
|
131
|
+
emits = d1.emits
|
132
|
+
assert_equal 3, emits.length
|
133
|
+
p emits
|
134
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
135
|
+
assert_equal '10.102.3.0', emits[0][2]['host']
|
136
|
+
assert_equal '::ffff:129.0.0.0', emits[1][2]['host']
|
137
|
+
assert_equal '2001:db8:0:8d3:0:8a2e::', emits[2][2]['host']
|
138
|
+
end
|
115
139
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-anonymizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -102,7 +102,7 @@ signing_key:
|
|
102
102
|
specification_version: 3
|
103
103
|
summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
|
104
104
|
algorithms. This data masking plugin protects privacy data such as ID, email, phone
|
105
|
-
number,
|
105
|
+
number, IPv4/IPv6 address and so on.
|
106
106
|
test_files:
|
107
107
|
- test/helper.rb
|
108
108
|
- test/plugin/test_out_anonymizer.rb
|