fluent-plugin-anonymizer 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +8 -4
- data/fluent-plugin-anonymizer.gemspec +2 -2
- data/lib/fluent/plugin/out_anonymizer.rb +10 -10
- data/test/plugin/test_out_anonymizer.rb +25 -0
- metadata +5 -4
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
## Overview
|
4
4
|
|
5
|
-
Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as
|
5
|
+
Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -27,7 +27,7 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
27
27
|
</source>
|
28
28
|
|
29
29
|
<match test.message>
|
30
|
-
type
|
30
|
+
type anonymizer
|
31
31
|
sha1_keys user_id, member_id, mail
|
32
32
|
ipv4_mask_keys host
|
33
33
|
ipv4_mask_subnet 24
|
@@ -46,10 +46,10 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
|
|
46
46
|
$ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
|
47
47
|
|
48
48
|
$ tail -f /var/log/td-agent/td-agent.log
|
49
|
-
2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.
|
49
|
+
2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
|
50
50
|
`````
|
51
51
|
|
52
|
-
|
52
|
+
## Parameters
|
53
53
|
|
54
54
|
* `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys`
|
55
55
|
|
@@ -83,6 +83,10 @@ Add original tag name into filtered record using SetTagKeyMixin function.
|
|
83
83
|
|
84
84
|
Edit tag format using HandleTagNameMixin function.
|
85
85
|
|
86
|
+
## Notes
|
87
|
+
|
88
|
+
* hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L98).
|
89
|
+
|
86
90
|
## Blog Articles
|
87
91
|
|
88
92
|
* http://y-ken.hatenablog.com/entry/fluent-plugin-anonymizer-has-released
|
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "fluent-plugin-anonymizer"
|
7
|
-
spec.version = "0.0.
|
7
|
+
spec.version = "0.0.2"
|
8
8
|
spec.authors = ["Kentaro Yoshida"]
|
9
9
|
spec.email = ["y.ken.studio@gmail.com"]
|
10
|
-
spec.summary = %q{Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as
|
10
|
+
spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.}
|
11
11
|
spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
|
12
12
|
spec.license = "Apache License, Version 2.0"
|
13
13
|
|
@@ -47,23 +47,23 @@ class Fluent::AnonymizerOutput < Fluent::Output
|
|
47
47
|
|
48
48
|
def emit(tag, es, chain)
|
49
49
|
es.each do |time, record|
|
50
|
-
|
50
|
+
@hash_keys.each do |hash_key, hash_algorithm|
|
51
|
+
next unless record.include?(hash_key)
|
52
|
+
record[hash_key] = filter_anonymize_record(record[hash_key], hash_algorithm)
|
53
|
+
end
|
51
54
|
filter_record(tag, time, record)
|
52
55
|
Fluent::Engine.emit(tag, time, record)
|
53
56
|
end
|
54
57
|
chain.next
|
55
58
|
end
|
56
59
|
|
57
|
-
def filter_anonymize_record(
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
else
|
63
|
-
record[hash_key] = anonymize(record[hash_key], hash_algorithm, @hash_salt)
|
64
|
-
end
|
60
|
+
def filter_anonymize_record(data, hash_algorithm)
|
61
|
+
if data.is_a?(Array)
|
62
|
+
data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
|
63
|
+
else
|
64
|
+
data = anonymize(data, hash_algorithm, @hash_salt)
|
65
65
|
end
|
66
|
-
|
66
|
+
data
|
67
67
|
end
|
68
68
|
|
69
69
|
def anonymize(message, algorithm, salt)
|
@@ -26,6 +26,13 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
26
26
|
add_tag_prefix anonymized.
|
27
27
|
]
|
28
28
|
|
29
|
+
CONFIG_NEST_VALUE = %[
|
30
|
+
sha1_keys array,hash
|
31
|
+
ipv4_mask_keys host
|
32
|
+
remove_tag_prefix input.
|
33
|
+
add_tag_prefix anonymized.
|
34
|
+
]
|
35
|
+
|
29
36
|
def create_driver(conf=CONFIG,tag='test')
|
30
37
|
Fluent::Test::OutputTestDriver.new(Fluent::AnonymizerOutput, tag).configure(conf)
|
31
38
|
end
|
@@ -87,4 +94,22 @@ class AnonymizerOutputTest < Test::Unit::TestCase
|
|
87
94
|
assert_equal 'ce164718b94212332187eb8420903b46b334d609', emits[0][2]['telephone']
|
88
95
|
assert_equal 'signup', emits[0][2]['action']
|
89
96
|
end
|
97
|
+
|
98
|
+
def test_emit_nest_value
|
99
|
+
d1 = create_driver(CONFIG_NEST_VALUE, 'input.access')
|
100
|
+
d1.run do
|
101
|
+
d1.emit({
|
102
|
+
'host' => '10.102.3.80',
|
103
|
+
'array' => ['1000', '2000'],
|
104
|
+
'hash' => {'foo' => '1000', 'bar' => '2000'},
|
105
|
+
})
|
106
|
+
end
|
107
|
+
emits = d1.emits
|
108
|
+
assert_equal 1, emits.length
|
109
|
+
p emits[0]
|
110
|
+
assert_equal 'anonymized.access', emits[0][0] # tag
|
111
|
+
assert_equal '10.102.3.0', emits[0][2]['host']
|
112
|
+
assert_equal ["e3cbba8883fe746c6e35783c9404b4bc0c7ee9eb", "a4ac914c09d7c097fe1f4f96b897e625b6922069"], emits[0][2]['array']
|
113
|
+
assert_equal '1a1903d78aed9403649d61cb21ba6b489249761b', emits[0][2]['hash']
|
114
|
+
end
|
90
115
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-anonymizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -100,8 +100,9 @@ rubyforge_project:
|
|
100
100
|
rubygems_version: 1.8.23
|
101
101
|
signing_key:
|
102
102
|
specification_version: 3
|
103
|
-
summary: Fluentd filter output plugin to anonymize records
|
104
|
-
protects privacy data such as
|
103
|
+
summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
|
104
|
+
algorithms. This data masking plugin protects privacy data such as ID, email, phone
|
105
|
+
number, IP address and so on.
|
105
106
|
test_files:
|
106
107
|
- test/helper.rb
|
107
108
|
- test/plugin/test_out_anonymizer.rb
|