fluent-plugin-anonymizer 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +8 -4
- data/fluent-plugin-anonymizer.gemspec +2 -2
- data/lib/fluent/plugin/out_anonymizer.rb +10 -10
- data/test/plugin/test_out_anonymizer.rb +25 -0
- metadata +5 -4
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
5
|
-
Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as
|
5
|
+
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -27,7 +27,7 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
27
27
|
</source>
|
28
28
|
|
29
29
|
<match test.message>
|
30
|
-
type
|
30
|
+
type anonymizer
|
31
31
|
sha1_keys user_id, member_id, mail
|
32
32
|
ipv4_mask_keys host
|
33
33
|
ipv4_mask_subnet 24
|
@@ -46,10 +46,10 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
46
46
|
$ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
47
47
|
|
48
48
|
$ tail -f /var/log/td-agent/td-agent.log
|
49
|
-
2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.
|
49
|
+
2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
|
50
50
|
`````
|
51
51
|
|
52
|
-
|
52
|
+
## Parameters
|
53
53
|
|
54
54
|
* `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys`
|
55
55
|
|
@@ -83,6 +83,10 @@ Add original tag name into filtered record using SetTagKeyMixin function.
|
|
83
83
|
|
84
84
|
Edit tag format using HandleTagNameMixin function.
|
85
85
|
|
86
|
+
## Notes
|
87
|
+
|
88
|
+
* hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L98).
|
89
|
+
|
86
90
|
## Blog Articles
|
87
91
|
|
88
92
|
* http://y-ken.hatenablog.com/entry/fluent-plugin-anonymizer-has-released
|
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-anonymizer"
|
7
|
-
spec.version = "0.0.
|
7
|
+
spec.version = "0.0.2"
|
8
8
|
spec.authors = ["Kentaro Yoshida"]
|
9
9
|
spec.email = ["y.ken.studio@gmail.com"]
|
10
|
-
spec.summary = %q{Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as
|
10
|
+
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.}
|
11
11
|
spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
|
12
12
|
spec.license = "Apache License, Version 2.0"
|
13
13
|
|
@@ -47,23 +47,23 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
47
47
|
|
48
48
|
def emit(tag, es, chain)
|
49
49
|
es.each do |time, record|
|
50
|
-
|
50
|
+
@hash_keys.each do |hash_key, hash_algorithm|
|
51
|
+
next unless record.include?(hash_key)
|
52
|
+
record[hash_key] = filter_anonymize_record(record[hash_key], hash_algorithm)
|
53
|
+
end
|
51
54
|
filter_record(tag, time, record)
|
52
55
|
Fluent::Engine.emit(tag, time, record)
|
53
56
|
end
|
54
57
|
chain.next
|
55
58
|
end
|
56
59
|
|
57
|
-
def filter_anonymize_record(
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
else
|
63
|
-
record[hash_key] = anonymize(record[hash_key], hash_algorithm, @hash_salt)
|
64
|
-
end
|
60
|
+
def filter_anonymize_record(data, hash_algorithm)
|
61
|
+
if data.is_a?(Array)
|
62
|
+
data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
|
63
|
+
else
|
64
|
+
data = anonymize(data, hash_algorithm, @hash_salt)
|
65
65
|
end
|
66
|
-
|
66
|
+
data
|
67
67
|
end
|
68
68
|
|
69
69
|
def anonymize(message, algorithm, salt)
|
@@ -26,6 +26,13 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
26
26
|
add_tag_prefix anonymized.
|
27
27
|
]
|
28
28
|
|
29
|
+
CONFIG_NEST_VALUE = %[
|
30
|
+
sha1_keys array,hash
|
31
|
+
ipv4_mask_keys host
|
32
|
+
remove_tag_prefix input.
|
33
|
+
add_tag_prefix anonymized.
|
34
|
+
]
|
35
|
+
|
29
36
|
def create_driver(conf=CONFIG,tag='test')
|
30
37
|
Fluent::Test::OutputTestDriver.new(Fluent::AnonymizerOutput, tag).configure(conf)
|
31
38
|
end
|
@@ -87,4 +94,22 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
87
94
|
assert_equal 'ce164718b94212332187eb8420903b46b334d609', emits[0][2]['telephone']
|
88
95
|
assert_equal 'signup', emits[0][2]['action']
|
89
96
|
end
|
97
|
+
|
98
|
+
def test_emit_nest_value
|
99
|
+
d1 = create_driver(CONFIG_NEST_VALUE, 'input.access')
|
100
|
+
d1.run do
|
101
|
+
d1.emit({
|
102
|
+
'host' => '10.102.3.80',
|
103
|
+
'array' => ['1000', '2000'],
|
104
|
+
'hash' => {'foo' => '1000', 'bar' => '2000'},
|
105
|
+
})
|
106
|
+
end
|
107
|
+
emits = d1.emits
|
108
|
+
assert_equal 1, emits.length
|
109
|
+
p emits[0]
|
110
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
111
|
+
assert_equal '10.102.3.0', emits[0][2]['host']
|
112
|
+
assert_equal ["e3cbba8883fe746c6e35783c9404b4bc0c7ee9eb", "a4ac914c09d7c097fe1f4f96b897e625b6922069"], emits[0][2]['array']
|
113
|
+
assert_equal '1a1903d78aed9403649d61cb21ba6b489249761b', emits[0][2]['hash']
|
114
|
+
end
|
90
115
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-anonymizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -100,8 +100,9 @@ rubyforge_project:
|
|
100
100
|
rubygems_version: 1.8.23
|
101
101
|
signing_key:
|
102
102
|
specification_version: 3
|
103
|
-
summary: Fluentd filter output plugin to anonymize records
|
104
|
-
protects privacy data such as
|
103
|
+
summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
|
104
|
+
algorithms. This data masking plugin protects privacy data such as ID, email, phone
|
105
|
+
number, IP address and so on.
|
105
106
|
test_files:
|
106
107
|
- test/helper.rb
|
107
108
|
- test/plugin/test_out_anonymizer.rb
|