fluent-plugin-anonymizer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml CHANGED
@@ -1,6 +1,7 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
+ - 2.2
4
5
  - 2.1
5
6
  - 2.0.0
6
7
  - 1.9.3
data/README.md CHANGED
@@ -18,6 +18,8 @@ gem install fluent-plugin-anonymizer
18
18
 
19
19
  ## Tutorial
20
20
 
21
+ ### Output Plugin
22
+
21
23
  #### configuration
22
24
 
23
25
  It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, auto-detecting IPv4/IPv6 and rounding number with 24bit(IPv4) or 104bit(IPv6) netmask using `ipaddr_mask_keys` and `ipv4_mask_subnet`, `ipv6_mask_subnet` option.
@@ -63,6 +65,46 @@ $ tail -f /var/log/td-agent/td-agent.log
63
65
  2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
64
66
  `````
65
67
 
68
+ ### Filter Plugin
69
+
70
+ #### configuration
71
+
72
+ ```text
73
+ <source>
74
+ type dummy
75
+ tag raw.dummy
76
+ dummy [
77
+ {"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"},
78
+ {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
79
+ ]
80
+ </source>
81
+
82
+ <filter raw.**>
83
+ type anonymizer
84
+
85
+ # Specify hashing keys with comma
86
+ sha1_keys user_id, member_id, mail
87
+
88
+ # Set hash salt with any strings for more security
89
+ hash_salt mysaltstring
90
+
91
+ # Specify rounding address keys with comma and subnet mask
92
+ ipaddr_mask_keys host
93
+ ipv4_mask_subnet 24
94
+ ipv6_mask_subnet 104
95
+ </filter>
96
+
97
+ <match raw.**>
98
+ type stdout
99
+ </match>
100
+ ```
101
+
102
+ #### result
103
+
104
+ ```text
105
+ $ fluentd -c fluent.conf
106
+ ```
107
+
66
108
  ## Parameters
67
109
 
68
110
  * `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys`
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-anonymizer"
7
- spec.version = "0.3.0"
7
+ spec.version = "0.4.0"
8
8
  spec.authors = ["Kentaro Yoshida"]
9
9
  spec.email = ["y.ken.studio@gmail.com"]
10
10
  spec.summary = %q{Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as UserID, Email, Phone number, IPv4/IPv6 address and so on.}
@@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
18
18
 
19
19
  spec.add_development_dependency "bundler"
20
20
  spec.add_development_dependency "rake"
21
+ spec.add_development_dependency "test-unit", "~> 3"
21
22
  spec.add_runtime_dependency "fluentd"
22
23
  spec.add_runtime_dependency "fluent-mixin-rewrite-tag-name"
23
24
  end
@@ -0,0 +1,98 @@
1
+ require 'openssl'
2
+ require 'ipaddr'
3
+
4
+ module Fluent
5
+ class Anonymizer
6
+
7
+ attr_reader :log
8
+
9
+ HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
10
+ DIGEST = {
11
+ "md5" => Proc.new { OpenSSL::Digest.new('md5') },
12
+ "sha1" => Proc.new { OpenSSL::Digest.new('sha1') },
13
+ "sha256" => Proc.new { OpenSSL::Digest.new('sha256') },
14
+ "sha384" => Proc.new { OpenSSL::Digest.new('sha384') },
15
+ "sha512" => Proc.new { OpenSSL::Digest.new('sha512') }
16
+ }
17
+
18
+ def initialize(plugin, conf)
19
+ @log = plugin.log
20
+ @hash_salt = plugin.hash_salt
21
+ @ipv4_mask_subnet = plugin.ipv4_mask_subnet
22
+ @ipv6_mask_subnet = plugin.ipv6_mask_subnet
23
+
24
+ @hash_keys = {}
25
+ conf.keys.select{|k| k =~ /_keys$/}.each do |key|
26
+ hash_algorithm_name = key.sub('_keys','')
27
+ raise Fluent::ConfigError, "anonymizer: unsupported key #{hash_algorithm_name}" unless HASH_ALGORITHM.include?(hash_algorithm_name)
28
+ conf[key].gsub(' ', '').split(',').each do |record_key|
29
+ @hash_keys.store(record_key.split('.'), hash_algorithm_name)
30
+ end
31
+ end
32
+
33
+ if @hash_keys.empty?
34
+ raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
35
+ end
36
+ log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
37
+
38
+ if plugin.is_a?(Fluent::Output)
39
+ unless have_tag_option?(plugin)
40
+ raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
41
+ end
42
+ end
43
+ end
44
+
45
+ def anonymize(record)
46
+ @hash_keys.each do |hash_key, hash_algorithm|
47
+ record = anonymize_record(record, hash_key, hash_algorithm)
48
+ end
49
+ record
50
+ end
51
+
52
+ private
53
+
54
+ def anonymize_record(record, key, hash_algorithm)
55
+ if record.has_key?(key.first)
56
+ if key.size == 1
57
+ record[key.first] = anonymize_values(record[key.first], hash_algorithm)
58
+ else
59
+ record[key.first] = anonymize_record(record[key.first], key[1..-1], hash_algorithm)
60
+ end
61
+ end
62
+ record
63
+ end
64
+
65
+ def anonymize_values(data, hash_algorithm)
66
+ begin
67
+ if data.is_a?(Array)
68
+ data = data.collect { |v| anonymize_value(v, hash_algorithm, @hash_salt) }
69
+ else
70
+ data = anonymize_value(data, hash_algorithm, @hash_salt)
71
+ end
72
+ rescue => e
73
+ log.error "anonymizer: failed to anonymize record. :message=>#{e.message} :data=>#{data}"
74
+ log.error e.backtrace.join("\n")
75
+ end
76
+ data
77
+ end
78
+
79
+ def anonymize_value(message, algorithm, salt)
80
+ case algorithm
81
+ when 'md5','sha1','sha256','sha384','sha512'
82
+ OpenSSL::HMAC.hexdigest(DIGEST[algorithm].call, salt, message.to_s)
83
+ when 'ipaddr_mask'
84
+ address = IPAddr.new(message)
85
+ subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
86
+ address.mask(subnet).to_s
87
+ else
88
+ log.warn "anonymizer: unknown algorithm #{algorithm} has called."
89
+ end
90
+ end
91
+
92
+ def have_tag_option?(plugin)
93
+ plugin.tag ||
94
+ plugin.remove_tag_prefix || plugin.remove_tag_suffix ||
95
+ plugin.add_tag_prefix || plugin.add_tag_suffix
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,26 @@
1
+ module Fluent
2
+ class AnonymizerFilter < Filter
3
+ Plugin.register_filter('anonymizer', self)
4
+
5
+ config_param :tag, :string, :default => nil
6
+ config_param :hash_salt, :string, :default => ''
7
+ config_param :ipv4_mask_subnet, :integer, :default => 24
8
+ config_param :ipv6_mask_subnet, :integer, :default => 104
9
+
10
+ config_set_default :include_tag_key, false
11
+
12
+ def initialize
13
+ super
14
+ require 'fluent/plugin/anonymizer'
15
+ end
16
+
17
+ def configure(conf)
18
+ super
19
+ @anonymizer = Anonymizer.new(self, conf)
20
+ end
21
+
22
+ def filter(tag, time, record)
23
+ record = @anonymizer.anonymize(record)
24
+ end
25
+ end
26
+ end
@@ -8,7 +8,6 @@ class Fluent::AnonymizerOutput < Fluent::Output
8
8
  define_method(:log) { $log }
9
9
  end
10
10
 
11
- HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
12
11
  config_param :tag, :string, :default => nil
13
12
  config_param :hash_salt, :string, :default => ''
14
13
  config_param :ipv4_mask_subnet, :integer, :default => 24
@@ -19,90 +18,24 @@ class Fluent::AnonymizerOutput < Fluent::Output
19
18
  include Fluent::SetTagKeyMixin
20
19
  config_set_default :include_tag_key, false
21
20
 
22
- DIGEST = {
23
- "md5" => Proc.new { OpenSSL::Digest.new('md5') },
24
- "sha1" => Proc.new { OpenSSL::Digest.new('sha1') },
25
- "sha256" => Proc.new { OpenSSL::Digest.new('sha256') },
26
- "sha384" => Proc.new { OpenSSL::Digest.new('sha384') },
27
- "sha512" => Proc.new { OpenSSL::Digest.new('sha512') }
28
- }
29
-
30
21
  def initialize
31
- require 'openssl'
32
- require 'ipaddr'
22
+ require 'fluent/plugin/anonymizer'
33
23
  super
34
24
  end
35
25
 
36
26
  def configure(conf)
37
27
  super
38
-
39
- @hash_keys = Hash.new
40
- conf.keys.select{|k| k =~ /_keys$/}.each do |key|
41
- hash_algorithm_name = key.sub('_keys','')
42
- raise Fluent::ConfigError, "anonymizer: unsupported key #{hash_algorithm_name}" unless HASH_ALGORITHM.include?(hash_algorithm_name)
43
- conf[key].gsub(' ', '').split(',').each do |record_key|
44
- @hash_keys.store(record_key.split('.'), hash_algorithm_name)
45
- end
46
- end
47
-
48
- if @hash_keys.count < 1
49
- raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
50
- end
51
- log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
52
-
53
- if ( !@tag && !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
54
- raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
55
- end
28
+ @anonymizer = Fluent::Anonymizer.new(self, conf)
56
29
  end
57
30
 
58
31
  def emit(tag, es, chain)
59
32
  es.each do |time, record|
60
- @hash_keys.each do |hash_key, hash_algorithm|
61
- record = filter_anonymize_record(record, hash_key, hash_algorithm)
62
- end
33
+ record = @anonymizer.anonymize(record)
63
34
  emit_tag = tag.dup
64
35
  filter_record(emit_tag, time, record)
65
36
  Fluent::Engine.emit(emit_tag, time, record)
66
37
  end
67
38
  chain.next
68
39
  end
69
-
70
- def filter_anonymize_record(record, key, hash_algorithm)
71
- if record.has_key?(key.first)
72
- if key.size == 1
73
- record[key.first] = filter_anonymize_value(record[key.first], hash_algorithm)
74
- else
75
- record[key.first] = filter_anonymize_record(record[key.first], key[1..-1], hash_algorithm)
76
- end
77
- end
78
- return record
79
- end
80
-
81
- def filter_anonymize_value(data, hash_algorithm)
82
- begin
83
- if data.is_a?(Array)
84
- data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
85
- else
86
- data = anonymize(data, hash_algorithm, @hash_salt)
87
- end
88
- rescue StandardError => e
89
- log.error "anonymizer: failed to anonymize record. :message=>#{e.message} :data=>#{data}"
90
- log.error e.backtrace.join("\n")
91
- end
92
- data
93
- end
94
-
95
- def anonymize(message, algorithm, salt)
96
- case algorithm
97
- when 'md5','sha1','sha256','sha384','sha512'
98
- OpenSSL::HMAC.hexdigest(DIGEST[algorithm].call, salt, message.to_s)
99
- when 'ipaddr_mask'
100
- address = IPAddr.new(message)
101
- subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
102
- address.mask(subnet).to_s
103
- else
104
- log.warn "anonymizer: unknown algorithm #{algorithm} has called."
105
- end
106
- end
107
40
  end
108
41
 
data/test/helper.rb CHANGED
@@ -23,6 +23,7 @@ unless ENV.has_key?('VERBOSE')
23
23
  end
24
24
 
25
25
  require 'fluent/plugin/out_anonymizer'
26
+ require 'fluent/plugin/filter_anonymizer'
26
27
 
27
28
  class Test::Unit::TestCase
28
29
  end
@@ -0,0 +1,172 @@
1
+ require 'helper'
2
+
3
+ class AnonymizerFilterTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ @time = Fluent::Engine.now
7
+ end
8
+
9
+ CONFIG = %[
10
+ md5_keys data_for_md5
11
+ sha1_keys data_for_sha1
12
+ sha256_keys data_for_sha256
13
+ sha384_keys data_for_sha384
14
+ sha512_keys data_for_sha512
15
+ hash_salt test_salt_string
16
+ ipaddr_mask_keys host
17
+ ipv4_mask_subnet 24
18
+ ]
19
+
20
+ def create_driver(conf=CONFIG, tag='test')
21
+ Fluent::Test::FilterTestDriver.new(Fluent::AnonymizerFilter, tag).configure(conf)
22
+ end
23
+
24
+ def filter(conf, messages)
25
+ d = create_driver(conf)
26
+ d.run {
27
+ messages.each {|message|
28
+ d.filter(message, @time)
29
+ }
30
+ }
31
+ filtered = d.filtered_as_array
32
+ filtered.map {|m| m[2] }
33
+ end
34
+
35
+ def test_configure
36
+ assert_raise(Fluent::ConfigError) {
37
+ d = create_driver('')
38
+ }
39
+ assert_raise(Fluent::ConfigError) {
40
+ d = create_driver('unknown_keys')
41
+ }
42
+ d = create_driver(CONFIG)
43
+ assert_equal 'test_salt_string', d.instance.config['hash_salt']
44
+ end
45
+
46
+ def test_filter
47
+ messages = [
48
+ {
49
+ 'host' => '10.102.3.80',
50
+ 'data_for_md5' => '12345',
51
+ 'data_for_sha1' => '12345',
52
+ 'data_for_sha256' => '12345',
53
+ 'data_for_sha384' => '12345',
54
+ 'data_for_sha512' => '12345'
55
+ }
56
+ ]
57
+ expected = {
58
+ 'host' => '10.102.3.0',
59
+ 'data_for_md5' => 'e738cbde82a514dc60582cd467c240ed',
60
+ 'data_for_sha1' => '69cf099459c06b852ede96d39b710027727d13c6',
61
+ 'data_for_sha256' => '804d83b8c6a3e01498d40677652b084333196d8e548ee5a8710fbd0e1e115527',
62
+ 'data_for_sha384' => '6c90c389bbdfc210416b9318df3f526b4f218f8a8df3a67020353c35da22dc154460b18f22a8009a747b3ef2975acae7',
63
+ 'data_for_sha512' => 'cdbb897e6f3a092161bdb51164eb2996b75b00555f568219628ff15cd2929865d217af5dff9c32ddc908b75a89baec96b3e9a0da120e919f5246de0f1bc54c58'
64
+ }
65
+ filtered = filter(CONFIG, messages)
66
+ assert_equal(expected, filtered[0])
67
+ end
68
+
69
+ def test_filter_multi_keys
70
+ conf = %[
71
+ sha1_keys member_id, mail, telephone
72
+ ipaddr_mask_keys host, host2
73
+ ipv4_mask_subnet 16
74
+ ]
75
+ messages = [
76
+ {
77
+ 'host' => '10.102.3.80',
78
+ 'host2' => '10.102.3.80',
79
+ 'member_id' => '12345',
80
+ 'mail' => 'example@example.com',
81
+ 'telephone' => '00-0000-0000',
82
+ 'action' => 'signup'
83
+ }
84
+ ]
85
+ expected = {
86
+ 'host' => '10.102.0.0',
87
+ 'host2' => '10.102.0.0',
88
+ 'member_id' => '774472f0dc892f0b3299cae8dadacd0a74ba59d7',
89
+ 'mail' => 'd7b728209f5dd8df10cecbced30394c3c7fc2c82',
90
+ 'telephone' => 'a67f73c395105a358a03a0f127bf64b5495e7841',
91
+ 'action' => 'signup'
92
+ }
93
+ filtered = filter(conf, messages)
94
+ assert_equal(expected, filtered[0])
95
+ end
96
+
97
+ def test_filter_nested_keys
98
+ conf = %[
99
+ sha1_keys nested.data,nested.nested.data
100
+ ipaddr_mask_keys hosts.host1
101
+ ipv4_mask_subnet 16
102
+ ]
103
+ messages = [
104
+ {
105
+ 'hosts' => {
106
+ 'host1' => '10.102.3.80',
107
+ },
108
+ 'nested' => {
109
+ 'data' => '12345',
110
+ 'nested' => {
111
+ 'data' => '12345'
112
+ }
113
+ }
114
+ }
115
+ ]
116
+ expected = {
117
+ 'hosts' => {
118
+ 'host1' => '10.102.0.0'
119
+ },
120
+ 'nested' => {
121
+ 'data' => '774472f0dc892f0b3299cae8dadacd0a74ba59d7',
122
+ 'nested' => {
123
+ 'data' => '774472f0dc892f0b3299cae8dadacd0a74ba59d7'
124
+ }
125
+ }
126
+ }
127
+ filtered = filter(conf, messages)
128
+ assert_equal(expected, filtered[0])
129
+ end
130
+
131
+ def test_filter_nest_value
132
+ conf = %[
133
+ sha1_keys array,hash
134
+ ipaddr_mask_keys host
135
+ ]
136
+ messages = [
137
+ {
138
+ 'host' => '10.102.3.80',
139
+ 'array' => ['1000', '2000'],
140
+ 'hash' => {'foo' => '1000', 'bar' => '2000'},
141
+ }
142
+ ]
143
+ expected = {
144
+ 'host' => '10.102.3.0',
145
+ 'array' => ["c1628fc0d473cb21b15607c10bdcad19d1a42e24", "ea87abc249f9f2d430edb816514bffeffd3e698e"],
146
+ 'hash' => '28fe85deb0d1d39ee14c49c62bc4773b0338247b'
147
+ }
148
+ filtered = filter(conf, messages)
149
+ assert_equal(expected, filtered[0])
150
+ end
151
+
152
+ def test_filter_ipv6
153
+ conf = %[
154
+ ipaddr_mask_keys host
155
+ ipv4_mask_subnet 24
156
+ ipv6_mask_subnet 104
157
+ ]
158
+ messages = [
159
+ { 'host' => '10.102.3.80' },
160
+ { 'host' => '0:0:0:0:0:FFFF:129.144.52.38' },
161
+ { 'host' => '2001:db8:0:8d3:0:8a2e:70:7344' }
162
+ ]
163
+ expected = [
164
+ { 'host' => '10.102.3.0' },
165
+ { 'host' => '::ffff:129.0.0.0' },
166
+ { 'host' => '2001:db8:0:8d3:0:8a2e::' }
167
+ ]
168
+ filtered = filter(conf, messages)
169
+ assert_equal(expected, filtered)
170
+ end
171
+ end
172
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-anonymizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-12-08 00:00:00.000000000 Z
12
+ date: 2015-11-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -43,6 +43,22 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: test-unit
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3'
46
62
  - !ruby/object:Gem::Dependency
47
63
  name: fluentd
48
64
  requirement: !ruby/object:Gem::Requirement
@@ -89,8 +105,11 @@ files:
89
105
  - README.md
90
106
  - Rakefile
91
107
  - fluent-plugin-anonymizer.gemspec
108
+ - lib/fluent/plugin/anonymizer.rb
109
+ - lib/fluent/plugin/filter_anonymizer.rb
92
110
  - lib/fluent/plugin/out_anonymizer.rb
93
111
  - test/helper.rb
112
+ - test/plugin/test_filter_anonymizer.rb
94
113
  - test/plugin/test_out_anonymizer.rb
95
114
  homepage: https://github.com/y-ken/fluent-plugin-anonymizer
96
115
  licenses:
@@ -121,4 +140,5 @@ summary: Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1
121
140
  Phone number, IPv4/IPv6 address and so on.
122
141
  test_files:
123
142
  - test/helper.rb
143
+ - test/plugin/test_filter_anonymizer.rb
124
144
  - test/plugin/test_out_anonymizer.rb