fluent-plugin-anonymizer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml CHANGED
@@ -1,6 +1,7 @@
1
1
  language: ruby
2
2
 
3
3
  rvm:
4
+ - 2.2
4
5
  - 2.1
5
6
  - 2.0.0
6
7
  - 1.9.3
data/README.md CHANGED
@@ -18,6 +18,8 @@ gem install fluent-plugin-anonymizer
18
18
 
19
19
  ## Tutorial
20
20
 
21
+ ### Output Plugin
22
+
21
23
  #### configuration
22
24
 
23
25
  It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, auto-detecting IPv4/IPv6 and rounding number with 24bit(IPv4) or 104bit(IPv6) netmask using `ipaddr_mask_keys` and `ipv4_mask_subnet`, `ipv6_mask_subnet` option.
@@ -63,6 +65,46 @@ $ tail -f /var/log/td-agent/td-agent.log
63
65
  2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
64
66
  `````
65
67
 
68
+ ### Filter Plugin
69
+
70
+ #### configuration
71
+
72
+ ```text
73
+ <source>
74
+ type dummy
75
+ tag raw.dummy
76
+ dummy [
77
+ {"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"},
78
+ {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"61f6c1b5f19e0a7f73dd52a23534085bf01f2c67","mail":"eeb890d74b8c1c4cd1e35a3ea62166e0b770f4f4"}
79
+ ]
80
+ </source>
81
+
82
+ <filter raw.**>
83
+ type anonymizer
84
+
85
+ # Specify hashing keys with comma
86
+ sha1_keys user_id, member_id, mail
87
+
88
+ # Set hash salt with any strings for more security
89
+ hash_salt mysaltstring
90
+
91
+ # Specify rounding address keys with comma and subnet mask
92
+ ipaddr_mask_keys host
93
+ ipv4_mask_subnet 24
94
+ ipv6_mask_subnet 104
95
+ </filter>
96
+
97
+ <match raw.**>
98
+ type stdout
99
+ </match>
100
+ ```
101
+
102
+ #### result
103
+
104
+ ```text
105
+ $ fluentd -c fluent.conf
106
+ ```
107
+
66
108
  ## Parameters
67
109
 
68
110
  * `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512_keys`
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-anonymizer"
7
- spec.version = "0.3.0"
7
+ spec.version = "0.4.0"
8
8
  spec.authors = ["Kentaro Yoshida"]
9
9
  spec.email = ["y.ken.studio@gmail.com"]
10
10
  spec.summary = %q{Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as UserID, Email, Phone number, IPv4/IPv6 address and so on.}
@@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
18
18
 
19
19
  spec.add_development_dependency "bundler"
20
20
  spec.add_development_dependency "rake"
21
+ spec.add_development_dependency "test-unit", "~> 3"
21
22
  spec.add_runtime_dependency "fluentd"
22
23
  spec.add_runtime_dependency "fluent-mixin-rewrite-tag-name"
23
24
  end
@@ -0,0 +1,98 @@
1
+ require 'openssl'
2
+ require 'ipaddr'
3
+
4
+ module Fluent
5
+ class Anonymizer
6
+
7
+ attr_reader :log
8
+
9
+ HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
10
+ DIGEST = {
11
+ "md5" => Proc.new { OpenSSL::Digest.new('md5') },
12
+ "sha1" => Proc.new { OpenSSL::Digest.new('sha1') },
13
+ "sha256" => Proc.new { OpenSSL::Digest.new('sha256') },
14
+ "sha384" => Proc.new { OpenSSL::Digest.new('sha384') },
15
+ "sha512" => Proc.new { OpenSSL::Digest.new('sha512') }
16
+ }
17
+
18
+ def initialize(plugin, conf)
19
+ @log = plugin.log
20
+ @hash_salt = plugin.hash_salt
21
+ @ipv4_mask_subnet = plugin.ipv4_mask_subnet
22
+ @ipv6_mask_subnet = plugin.ipv6_mask_subnet
23
+
24
+ @hash_keys = {}
25
+ conf.keys.select{|k| k =~ /_keys$/}.each do |key|
26
+ hash_algorithm_name = key.sub('_keys','')
27
+ raise Fluent::ConfigError, "anonymizer: unsupported key #{hash_algorithm_name}" unless HASH_ALGORITHM.include?(hash_algorithm_name)
28
+ conf[key].gsub(' ', '').split(',').each do |record_key|
29
+ @hash_keys.store(record_key.split('.'), hash_algorithm_name)
30
+ end
31
+ end
32
+
33
+ if @hash_keys.empty?
34
+ raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
35
+ end
36
+ log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
37
+
38
+ if plugin.is_a?(Fluent::Output)
39
+ unless have_tag_option?(plugin)
40
+ raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
41
+ end
42
+ end
43
+ end
44
+
45
+ def anonymize(record)
46
+ @hash_keys.each do |hash_key, hash_algorithm|
47
+ record = anonymize_record(record, hash_key, hash_algorithm)
48
+ end
49
+ record
50
+ end
51
+
52
+ private
53
+
54
+ def anonymize_record(record, key, hash_algorithm)
55
+ if record.has_key?(key.first)
56
+ if key.size == 1
57
+ record[key.first] = anonymize_values(record[key.first], hash_algorithm)
58
+ else
59
+ record[key.first] = anonymize_record(record[key.first], key[1..-1], hash_algorithm)
60
+ end
61
+ end
62
+ record
63
+ end
64
+
65
+ def anonymize_values(data, hash_algorithm)
66
+ begin
67
+ if data.is_a?(Array)
68
+ data = data.collect { |v| anonymize_value(v, hash_algorithm, @hash_salt) }
69
+ else
70
+ data = anonymize_value(data, hash_algorithm, @hash_salt)
71
+ end
72
+ rescue => e
73
+ log.error "anonymizer: failed to anonymize record. :message=>#{e.message} :data=>#{data}"
74
+ log.error e.backtrace.join("\n")
75
+ end
76
+ data
77
+ end
78
+
79
+ def anonymize_value(message, algorithm, salt)
80
+ case algorithm
81
+ when 'md5','sha1','sha256','sha384','sha512'
82
+ OpenSSL::HMAC.hexdigest(DIGEST[algorithm].call, salt, message.to_s)
83
+ when 'ipaddr_mask'
84
+ address = IPAddr.new(message)
85
+ subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
86
+ address.mask(subnet).to_s
87
+ else
88
+ log.warn "anonymizer: unknown algorithm #{algorithm} has called."
89
+ end
90
+ end
91
+
92
+ def have_tag_option?(plugin)
93
+ plugin.tag ||
94
+ plugin.remove_tag_prefix || plugin.remove_tag_suffix ||
95
+ plugin.add_tag_prefix || plugin.add_tag_suffix
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,26 @@
1
+ module Fluent
2
+ class AnonymizerFilter < Filter
3
+ Plugin.register_filter('anonymizer', self)
4
+
5
+ config_param :tag, :string, :default => nil
6
+ config_param :hash_salt, :string, :default => ''
7
+ config_param :ipv4_mask_subnet, :integer, :default => 24
8
+ config_param :ipv6_mask_subnet, :integer, :default => 104
9
+
10
+ config_set_default :include_tag_key, false
11
+
12
+ def initialize
13
+ super
14
+ require 'fluent/plugin/anonymizer'
15
+ end
16
+
17
+ def configure(conf)
18
+ super
19
+ @anonymizer = Anonymizer.new(self, conf)
20
+ end
21
+
22
+ def filter(tag, time, record)
23
+ record = @anonymizer.anonymize(record)
24
+ end
25
+ end
26
+ end
@@ -8,7 +8,6 @@ class Fluent::AnonymizerOutput < Fluent::Output
8
8
  define_method(:log) { $log }
9
9
  end
10
10
 
11
- HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
12
11
  config_param :tag, :string, :default => nil
13
12
  config_param :hash_salt, :string, :default => ''
14
13
  config_param :ipv4_mask_subnet, :integer, :default => 24
@@ -19,90 +18,24 @@ class Fluent::AnonymizerOutput < Fluent::Output
19
18
  include Fluent::SetTagKeyMixin
20
19
  config_set_default :include_tag_key, false
21
20
 
22
- DIGEST = {
23
- "md5" => Proc.new { OpenSSL::Digest.new('md5') },
24
- "sha1" => Proc.new { OpenSSL::Digest.new('sha1') },
25
- "sha256" => Proc.new { OpenSSL::Digest.new('sha256') },
26
- "sha384" => Proc.new { OpenSSL::Digest.new('sha384') },
27
- "sha512" => Proc.new { OpenSSL::Digest.new('sha512') }
28
- }
29
-
30
21
  def initialize
31
- require 'openssl'
32
- require 'ipaddr'
22
+ require 'fluent/plugin/anonymizer'
33
23
  super
34
24
  end
35
25
 
36
26
  def configure(conf)
37
27
  super
38
-
39
- @hash_keys = Hash.new
40
- conf.keys.select{|k| k =~ /_keys$/}.each do |key|
41
- hash_algorithm_name = key.sub('_keys','')
42
- raise Fluent::ConfigError, "anonymizer: unsupported key #{hash_algorithm_name}" unless HASH_ALGORITHM.include?(hash_algorithm_name)
43
- conf[key].gsub(' ', '').split(',').each do |record_key|
44
- @hash_keys.store(record_key.split('.'), hash_algorithm_name)
45
- end
46
- end
47
-
48
- if @hash_keys.count < 1
49
- raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
50
- end
51
- log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
52
-
53
- if ( !@tag && !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
54
- raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
55
- end
28
+ @anonymizer = Fluent::Anonymizer.new(self, conf)
56
29
  end
57
30
 
58
31
  def emit(tag, es, chain)
59
32
  es.each do |time, record|
60
- @hash_keys.each do |hash_key, hash_algorithm|
61
- record = filter_anonymize_record(record, hash_key, hash_algorithm)
62
- end
33
+ record = @anonymizer.anonymize(record)
63
34
  emit_tag = tag.dup
64
35
  filter_record(emit_tag, time, record)
65
36
  Fluent::Engine.emit(emit_tag, time, record)
66
37
  end
67
38
  chain.next
68
39
  end
69
-
70
- def filter_anonymize_record(record, key, hash_algorithm)
71
- if record.has_key?(key.first)
72
- if key.size == 1
73
- record[key.first] = filter_anonymize_value(record[key.first], hash_algorithm)
74
- else
75
- record[key.first] = filter_anonymize_record(record[key.first], key[1..-1], hash_algorithm)
76
- end
77
- end
78
- return record
79
- end
80
-
81
- def filter_anonymize_value(data, hash_algorithm)
82
- begin
83
- if data.is_a?(Array)
84
- data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
85
- else
86
- data = anonymize(data, hash_algorithm, @hash_salt)
87
- end
88
- rescue StandardError => e
89
- log.error "anonymizer: failed to anonymize record. :message=>#{e.message} :data=>#{data}"
90
- log.error e.backtrace.join("\n")
91
- end
92
- data
93
- end
94
-
95
- def anonymize(message, algorithm, salt)
96
- case algorithm
97
- when 'md5','sha1','sha256','sha384','sha512'
98
- OpenSSL::HMAC.hexdigest(DIGEST[algorithm].call, salt, message.to_s)
99
- when 'ipaddr_mask'
100
- address = IPAddr.new(message)
101
- subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
102
- address.mask(subnet).to_s
103
- else
104
- log.warn "anonymizer: unknown algorithm #{algorithm} has called."
105
- end
106
- end
107
40
  end
108
41
 
data/test/helper.rb CHANGED
@@ -23,6 +23,7 @@ unless ENV.has_key?('VERBOSE')
23
23
  end
24
24
 
25
25
  require 'fluent/plugin/out_anonymizer'
26
+ require 'fluent/plugin/filter_anonymizer'
26
27
 
27
28
  class Test::Unit::TestCase
28
29
  end
@@ -0,0 +1,172 @@
1
+ require 'helper'
2
+
3
+ class AnonymizerFilterTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ @time = Fluent::Engine.now
7
+ end
8
+
9
+ CONFIG = %[
10
+ md5_keys data_for_md5
11
+ sha1_keys data_for_sha1
12
+ sha256_keys data_for_sha256
13
+ sha384_keys data_for_sha384
14
+ sha512_keys data_for_sha512
15
+ hash_salt test_salt_string
16
+ ipaddr_mask_keys host
17
+ ipv4_mask_subnet 24
18
+ ]
19
+
20
+ def create_driver(conf=CONFIG, tag='test')
21
+ Fluent::Test::FilterTestDriver.new(Fluent::AnonymizerFilter, tag).configure(conf)
22
+ end
23
+
24
+ def filter(conf, messages)
25
+ d = create_driver(conf)
26
+ d.run {
27
+ messages.each {|message|
28
+ d.filter(message, @time)
29
+ }
30
+ }
31
+ filtered = d.filtered_as_array
32
+ filtered.map {|m| m[2] }
33
+ end
34
+
35
+ def test_configure
36
+ assert_raise(Fluent::ConfigError) {
37
+ d = create_driver('')
38
+ }
39
+ assert_raise(Fluent::ConfigError) {
40
+ d = create_driver('unknown_keys')
41
+ }
42
+ d = create_driver(CONFIG)
43
+ assert_equal 'test_salt_string', d.instance.config['hash_salt']
44
+ end
45
+
46
+ def test_filter
47
+ messages = [
48
+ {
49
+ 'host' => '10.102.3.80',
50
+ 'data_for_md5' => '12345',
51
+ 'data_for_sha1' => '12345',
52
+ 'data_for_sha256' => '12345',
53
+ 'data_for_sha384' => '12345',
54
+ 'data_for_sha512' => '12345'
55
+ }
56
+ ]
57
+ expected = {
58
+ 'host' => '10.102.3.0',
59
+ 'data_for_md5' => 'e738cbde82a514dc60582cd467c240ed',
60
+ 'data_for_sha1' => '69cf099459c06b852ede96d39b710027727d13c6',
61
+ 'data_for_sha256' => '804d83b8c6a3e01498d40677652b084333196d8e548ee5a8710fbd0e1e115527',
62
+ 'data_for_sha384' => '6c90c389bbdfc210416b9318df3f526b4f218f8a8df3a67020353c35da22dc154460b18f22a8009a747b3ef2975acae7',
63
+ 'data_for_sha512' => 'cdbb897e6f3a092161bdb51164eb2996b75b00555f568219628ff15cd2929865d217af5dff9c32ddc908b75a89baec96b3e9a0da120e919f5246de0f1bc54c58'
64
+ }
65
+ filtered = filter(CONFIG, messages)
66
+ assert_equal(expected, filtered[0])
67
+ end
68
+
69
+ def test_filter_multi_keys
70
+ conf = %[
71
+ sha1_keys member_id, mail, telephone
72
+ ipaddr_mask_keys host, host2
73
+ ipv4_mask_subnet 16
74
+ ]
75
+ messages = [
76
+ {
77
+ 'host' => '10.102.3.80',
78
+ 'host2' => '10.102.3.80',
79
+ 'member_id' => '12345',
80
+ 'mail' => 'example@example.com',
81
+ 'telephone' => '00-0000-0000',
82
+ 'action' => 'signup'
83
+ }
84
+ ]
85
+ expected = {
86
+ 'host' => '10.102.0.0',
87
+ 'host2' => '10.102.0.0',
88
+ 'member_id' => '774472f0dc892f0b3299cae8dadacd0a74ba59d7',
89
+ 'mail' => 'd7b728209f5dd8df10cecbced30394c3c7fc2c82',
90
+ 'telephone' => 'a67f73c395105a358a03a0f127bf64b5495e7841',
91
+ 'action' => 'signup'
92
+ }
93
+ filtered = filter(conf, messages)
94
+ assert_equal(expected, filtered[0])
95
+ end
96
+
97
+ def test_filter_nested_keys
98
+ conf = %[
99
+ sha1_keys nested.data,nested.nested.data
100
+ ipaddr_mask_keys hosts.host1
101
+ ipv4_mask_subnet 16
102
+ ]
103
+ messages = [
104
+ {
105
+ 'hosts' => {
106
+ 'host1' => '10.102.3.80',
107
+ },
108
+ 'nested' => {
109
+ 'data' => '12345',
110
+ 'nested' => {
111
+ 'data' => '12345'
112
+ }
113
+ }
114
+ }
115
+ ]
116
+ expected = {
117
+ 'hosts' => {
118
+ 'host1' => '10.102.0.0'
119
+ },
120
+ 'nested' => {
121
+ 'data' => '774472f0dc892f0b3299cae8dadacd0a74ba59d7',
122
+ 'nested' => {
123
+ 'data' => '774472f0dc892f0b3299cae8dadacd0a74ba59d7'
124
+ }
125
+ }
126
+ }
127
+ filtered = filter(conf, messages)
128
+ assert_equal(expected, filtered[0])
129
+ end
130
+
131
+ def test_filter_nest_value
132
+ conf = %[
133
+ sha1_keys array,hash
134
+ ipaddr_mask_keys host
135
+ ]
136
+ messages = [
137
+ {
138
+ 'host' => '10.102.3.80',
139
+ 'array' => ['1000', '2000'],
140
+ 'hash' => {'foo' => '1000', 'bar' => '2000'},
141
+ }
142
+ ]
143
+ expected = {
144
+ 'host' => '10.102.3.0',
145
+ 'array' => ["c1628fc0d473cb21b15607c10bdcad19d1a42e24", "ea87abc249f9f2d430edb816514bffeffd3e698e"],
146
+ 'hash' => '28fe85deb0d1d39ee14c49c62bc4773b0338247b'
147
+ }
148
+ filtered = filter(conf, messages)
149
+ assert_equal(expected, filtered[0])
150
+ end
151
+
152
+ def test_filter_ipv6
153
+ conf = %[
154
+ ipaddr_mask_keys host
155
+ ipv4_mask_subnet 24
156
+ ipv6_mask_subnet 104
157
+ ]
158
+ messages = [
159
+ { 'host' => '10.102.3.80' },
160
+ { 'host' => '0:0:0:0:0:FFFF:129.144.52.38' },
161
+ { 'host' => '2001:db8:0:8d3:0:8a2e:70:7344' }
162
+ ]
163
+ expected = [
164
+ { 'host' => '10.102.3.0' },
165
+ { 'host' => '::ffff:129.0.0.0' },
166
+ { 'host' => '2001:db8:0:8d3:0:8a2e::' }
167
+ ]
168
+ filtered = filter(conf, messages)
169
+ assert_equal(expected, filtered)
170
+ end
171
+ end
172
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-anonymizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-12-08 00:00:00.000000000 Z
12
+ date: 2015-11-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -43,6 +43,22 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: test-unit
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3'
46
62
  - !ruby/object:Gem::Dependency
47
63
  name: fluentd
48
64
  requirement: !ruby/object:Gem::Requirement
@@ -89,8 +105,11 @@ files:
89
105
  - README.md
90
106
  - Rakefile
91
107
  - fluent-plugin-anonymizer.gemspec
108
+ - lib/fluent/plugin/anonymizer.rb
109
+ - lib/fluent/plugin/filter_anonymizer.rb
92
110
  - lib/fluent/plugin/out_anonymizer.rb
93
111
  - test/helper.rb
112
+ - test/plugin/test_filter_anonymizer.rb
94
113
  - test/plugin/test_out_anonymizer.rb
95
114
  homepage: https://github.com/y-ken/fluent-plugin-anonymizer
96
115
  licenses:
@@ -121,4 +140,5 @@ summary: Fluentd filter output plugin to anonymize records with HMAC of MD5/SHA1
121
140
  Phone number, IPv4/IPv6 address and so on.
122
141
  test_files:
123
142
  - test/helper.rb
143
+ - test/plugin/test_filter_anonymizer.rb
124
144
  - test/plugin/test_out_anonymizer.rb