fluent-plugin-anonymizer 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  ## Overview
4
4
 
5
- Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as IP address, ID, email, phone number and so on.
5
+ Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.
6
6
 
7
7
  ## Installation
8
8
 
@@ -27,7 +27,7 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
27
27
  </source>
28
28
 
29
29
  <match test.message>
30
- type anonymize
30
+ type anonymizer
31
31
  sha1_keys user_id, member_id, mail
32
32
  ipv4_mask_keys host
33
33
  ipv4_mask_subnet 24
@@ -46,10 +46,10 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
46
46
  $ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
47
47
 
48
48
  $ tail -f /var/log/td-agent/td-agent.log
49
- 2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.0.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
49
+ 2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
50
50
  `````
51
51
 
52
- ### Params
52
+ ## Parameters
53
53
 
54
54
  * `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys`
55
55
 
@@ -83,6 +83,10 @@ Add original tag name into filtered record using SetTagKeyMixin function.
83
83
 
84
84
  Edit tag format using HandleTagNameMixin function.
85
85
 
86
+ ## Notes
87
+
88
+ * hashing nested value behavior is compatible with [LogStash::Filters::Anonymize](https://github.com/logstash/logstash/blob/master/lib/logstash/filters/anonymize.rb) does. For further details, please check it out the test code at [test_emit_nest_value](https://github.com/y-ken/fluent-plugin-anonymizer/blob/master/test/plugin/test_out_anonymizer.rb#L98).
89
+
86
90
  ## Blog Articles
87
91
 
88
92
  * http://y-ken.hatenablog.com/entry/fluent-plugin-anonymizer-has-released
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-anonymizer"
7
- spec.version = "0.0.1"
7
+ spec.version = "0.0.2"
8
8
  spec.authors = ["Kentaro Yoshida"]
9
9
  spec.email = ["y.ken.studio@gmail.com"]
10
- spec.summary = %q{Fluentd filter output plugin to anonymize records. This data masking plugin protects privacy data such as IP address, ID, email, phone number and so on.}
10
+ spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.}
11
11
  spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
12
12
  spec.license = "Apache License, Version 2.0"
13
13
 
@@ -47,23 +47,23 @@ class Fluent::AnonymizerOutput < Fluent::Output
47
47
 
48
48
  def emit(tag, es, chain)
49
49
  es.each do |time, record|
50
- record = filter_anonymize_record(record)
50
+ @hash_keys.each do |hash_key, hash_algorithm|
51
+ next unless record.include?(hash_key)
52
+ record[hash_key] = filter_anonymize_record(record[hash_key], hash_algorithm)
53
+ end
51
54
  filter_record(tag, time, record)
52
55
  Fluent::Engine.emit(tag, time, record)
53
56
  end
54
57
  chain.next
55
58
  end
56
59
 
57
- def filter_anonymize_record(record)
58
- @hash_keys.each do |hash_key, hash_algorithm|
59
- next unless record.include?(hash_key)
60
- if record[hash_key].is_a?(Array)
61
- record[hash_key] = record[hash_key].collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
62
- else
63
- record[hash_key] = anonymize(record[hash_key], hash_algorithm, @hash_salt)
64
- end
60
+ def filter_anonymize_record(data, hash_algorithm)
61
+ if data.is_a?(Array)
62
+ data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
63
+ else
64
+ data = anonymize(data, hash_algorithm, @hash_salt)
65
65
  end
66
- return record
66
+ data
67
67
  end
68
68
 
69
69
  def anonymize(message, algorithm, salt)
@@ -26,6 +26,13 @@ class AnonymizerOutputTest < Test::Unit::TestCase
26
26
  add_tag_prefix anonymized.
27
27
  ]
28
28
 
29
+ CONFIG_NEST_VALUE = %[
30
+ sha1_keys array,hash
31
+ ipv4_mask_keys host
32
+ remove_tag_prefix input.
33
+ add_tag_prefix anonymized.
34
+ ]
35
+
29
36
  def create_driver(conf=CONFIG,tag='test')
30
37
  Fluent::Test::OutputTestDriver.new(Fluent::AnonymizerOutput, tag).configure(conf)
31
38
  end
@@ -87,4 +94,22 @@ class AnonymizerOutputTest < Test::Unit::TestCase
87
94
  assert_equal 'ce164718b94212332187eb8420903b46b334d609', emits[0][2]['telephone']
88
95
  assert_equal 'signup', emits[0][2]['action']
89
96
  end
97
+
98
+ def test_emit_nest_value
99
+ d1 = create_driver(CONFIG_NEST_VALUE, 'input.access')
100
+ d1.run do
101
+ d1.emit({
102
+ 'host' => '10.102.3.80',
103
+ 'array' => ['1000', '2000'],
104
+ 'hash' => {'foo' => '1000', 'bar' => '2000'},
105
+ })
106
+ end
107
+ emits = d1.emits
108
+ assert_equal 1, emits.length
109
+ p emits[0]
110
+ assert_equal 'anonymized.access', emits[0][0] # tag
111
+ assert_equal '10.102.3.0', emits[0][2]['host']
112
+ assert_equal ["e3cbba8883fe746c6e35783c9404b4bc0c7ee9eb", "a4ac914c09d7c097fe1f4f96b897e625b6922069"], emits[0][2]['array']
113
+ assert_equal '1a1903d78aed9403649d61cb21ba6b489249761b', emits[0][2]['hash']
114
+ end
90
115
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-anonymizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-11-19 00:00:00.000000000 Z
12
+ date: 2013-11-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -100,8 +100,9 @@ rubyforge_project:
100
100
  rubygems_version: 1.8.23
101
101
  signing_key:
102
102
  specification_version: 3
103
- summary: Fluentd filter output plugin to anonymize records. This data masking plugin
104
- protects privacy data such as IP address, ID, email, phone number and so on.
103
+ summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
104
+ algorithms. This data masking plugin protects privacy data such as ID, email, phone
105
+ number, IP address and so on.
105
106
  test_files:
106
107
  - test/helper.rb
107
108
  - test/plugin/test_out_anonymizer.rb