fluent-plugin-anonymizer 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  ## Overview
4
4
 
5
- Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as IP address, ID, email, phone number and so on.
5
+ Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.
6
6
 
7
7
  ## Installation
8
8
 
@@ -27,7 +27,7 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
27
27
  </source>
28
28
 
29
29
  <match test.message>
30
- type anonymize
30
+ type anonymizer
31
31
  sha1_keys user_id, member_id, mail
32
32
  ipv4_mask_keys host
33
33
  ipv4_mask_subnet 24
@@ -46,10 +46,10 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
46
46
  $ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
47
47
 
48
48
  $ tail -f /var/log/td-agent/td-agent.log
49
- 2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.0.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
49
+ 2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
50
50
  `````
51
51
 
52
- ### Params
52
+ ## Parameters
53
53
 
54
54
  * `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys`
55
55
 
@@ -83,6 +83,10 @@ Add original tag name into filtered record using SetTagKeyMixin function.
83
83
 
84
84
  Edit tag format using HandleTagNameMixin function.
85
85
 
86
+ ## Notes
87
+
88
+ * hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L98).
89
+
86
90
  ## Blog Articles
87
91
 
88
92
  * http://y-ken.hatenablog.com/entry/fluent-plugin-anonymizer-has-released
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-anonymizer"
7
- spec.version = "0.0.1"
7
+ spec.version = "0.0.2"
8
8
  spec.authors = ["Kentaro Yoshida"]
9
9
  spec.email = ["y.ken.studio@gmail.com"]
10
- spec.summary = %q{Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as IP address, ID, email, phone number and so on.}
10
+ spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.}
11
11
  spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
12
12
  spec.license = "Apache License, Version 2.0"
13
13
 
@@ -47,23 +47,23 @@ class Fluent::AnonymizerOutput < Fluent::Output
47
47
 
48
48
  def emit(tag, es, chain)
49
49
  es.each do |time, record|
50
- record = filter_anonymize_record(record)
50
+ @hash_keys.each do |hash_key, hash_algorithm|
51
+ next unless record.include?(hash_key)
52
+ record[hash_key] = filter_anonymize_record(record[hash_key], hash_algorithm)
53
+ end
51
54
  filter_record(tag, time, record)
52
55
  Fluent::Engine.emit(tag, time, record)
53
56
  end
54
57
  chain.next
55
58
  end
56
59
 
57
- def filter_anonymize_record(record)
58
- @hash_keys.each do |hash_key, hash_algorithm|
59
- next unless record.include?(hash_key)
60
- if record[hash_key].is_a?(Array)
61
- record[hash_key] = record[hash_key].collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
62
- else
63
- record[hash_key] = anonymize(record[hash_key], hash_algorithm, @hash_salt)
64
- end
60
+ def filter_anonymize_record(data, hash_algorithm)
61
+ if data.is_a?(Array)
62
+ data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
63
+ else
64
+ data = anonymize(data, hash_algorithm, @hash_salt)
65
65
  end
66
- return record
66
+ data
67
67
  end
68
68
 
69
69
  def anonymize(message, algorithm, salt)
@@ -26,6 +26,13 @@ class AnonymizerOutputTest < Test::Unit::TestCase
26
26
  add_tag_prefix anonymized.
27
27
  ]
28
28
 
29
+ CONFIG_NEST_VALUE = %[
30
+ sha1_keys array,hash
31
+ ipv4_mask_keys host
32
+ remove_tag_prefix input.
33
+ add_tag_prefix anonymized.
34
+ ]
35
+
29
36
  def create_driver(conf=CONFIG,tag='test')
30
37
  Fluent::Test::OutputTestDriver.new(Fluent::AnonymizerOutput, tag).configure(conf)
31
38
  end
@@ -87,4 +94,22 @@ class AnonymizerOutputTest < Test::Unit::TestCase
87
94
  assert_equal 'ce164718b94212332187eb8420903b46b334d609', emits[0][2]['telephone']
88
95
  assert_equal 'signup', emits[0][2]['action']
89
96
  end
97
+
98
+ def test_emit_nest_value
99
+ d1 = create_driver(CONFIG_NEST_VALUE, 'input.access')
100
+ d1.run do
101
+ d1.emit({
102
+ 'host' => '10.102.3.80',
103
+ 'array' => ['1000', '2000'],
104
+ 'hash' => {'foo' => '1000', 'bar' => '2000'},
105
+ })
106
+ end
107
+ emits = d1.emits
108
+ assert_equal 1, emits.length
109
+ p emits[0]
110
+ assert_equal 'anonymized.access', emits[0][0] # tag
111
+ assert_equal '10.102.3.0', emits[0][2]['host']
112
+ assert_equal ["e3cbba8883fe746c6e35783c9404b4bc0c7ee9eb", "a4ac914c09d7c097fe1f4f96b897e625b6922069"], emits[0][2]['array']
113
+ assert_equal '1a1903d78aed9403649d61cb21ba6b489249761b', emits[0][2]['hash']
114
+ end
90
115
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-anonymizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-11-19 00:00:00.000000000 Z
12
+ date: 2013-11-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -100,8 +100,9 @@ rubyforge_project:
100
100
  rubygems_version: 1.8.23
101
101
  signing_key:
102
102
  specification_version: 3
103
- summary: Fluentd filter output plugin to anonymize records. This data masking plugin
104
- protects privacy data such as IP address, ID, email, phone number and so on.
103
+ summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
104
+ algorithms. This data masking plugin protects privacy data such as ID, email, phone
105
+ number, IP address and so on.
105
106
  test_files:
106
107
  - test/helper.rb
107
108
  - test/plugin/test_out_anonymizer.rb