fluent-plugin-anonymizer 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  ## Overview
4
4
 
5
- Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.
5
+ Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IPv4/IPv6 address and so on.
6
6
 
7
7
  ## Installation
8
8
 
@@ -18,7 +18,7 @@ gem install fluent-plugin-anonymizer
18
18
 
19
19
  #### configuration
20
20
 
21
- It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, rounding number with 24bit netmask with `ipv4_mask_keys` and `ipv4_mask_subnet` option.
21
+ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. For IP address, auto-detecting IPv4/IPv6 and rounding number with 24bit(IPv4) or 104bit(IPv6) netmask using `ipaddr_mask_keys` and `ipv4_mask_subnet`, `ipv6_mask_subnet` option.
22
22
 
23
23
  `````
24
24
  <source>
@@ -29,8 +29,9 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
29
29
  <match test.message>
30
30
  type anonymizer
31
31
  sha1_keys user_id, member_id, mail
32
- ipv4_mask_keys host
32
+ ipaddr_mask_keys host
33
33
  ipv4_mask_subnet 24
34
+ ipv6_mask_subnet 104
34
35
  remove_tag_prefix test.
35
36
  add_tag_prefix anonymized.
36
37
  </match>
@@ -44,9 +45,11 @@ It is a sample to hash record with sha1 for `user_id`, `member_id` and `mail`. F
44
45
 
45
46
  `````
46
47
  $ echo '{"host":"10.102.3.80","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
48
+ $ echo '{"host":"2001:db8:0:8d3:0:8a2e:70:7344","member_id":"12345", "mail":"example@example.com"}' | fluent-cat test.message
47
49
 
48
50
  $ tail -f /var/log/td-agent/td-agent.log
49
- 2013-11-19 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
51
+ 2014-01-06 18:30:21 +0900 anonymized.message: {"host":"10.102.3.0","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
52
+ 2014-01-06 18:30:22 +0900 anonymized.message: {"host":"2001:db8:0:8d3:0:8a2e::","member_id":"8cb2237d0679ca88db6464eac60da96345513964","mail":"914fec35ce8bfa1a067581032f26b053591ee38a"}
50
53
  `````
51
54
 
52
55
  ## Parameters
@@ -61,8 +64,9 @@ This salt affects for `md5_keys` `sha1_keys` `sha256_keys` `sha384_keys` `sha512
61
64
  It is recommend to set a hash salt to prevent rainbow table attacks.
62
65
 
63
66
 
64
- * `ipv4_mask_keys`
67
+ * `ipaddr_mask_keys`
65
68
  * `ipv4_mask_subnet` (default: 24)
69
+ * `ipv6_mask_subnet` (default: 104)
66
70
 
67
71
  Round number for following one or more keys. It makes easy to aggregate calculation.
68
72
 
@@ -4,10 +4,10 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "fluent-plugin-anonymizer"
7
- spec.version = "0.0.3"
7
+ spec.version = "0.1.0"
8
8
  spec.authors = ["Kentaro Yoshida"]
9
9
  spec.email = ["y.ken.studio@gmail.com"]
10
- spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IP address and so on.}
10
+ spec.summary = %q{Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512 algorithms. This data masking plugin protects privacy data such as ID, email, phone number, IPv4/IPv6 address and so on.}
11
11
  spec.homepage = "https://github.com/y-ken/fluent-plugin-anonymizer"
12
12
  spec.license = "Apache License, Version 2.0"
13
13
 
@@ -1,9 +1,10 @@
1
1
  class Fluent::AnonymizerOutput < Fluent::Output
2
2
  Fluent::Plugin.register_output('anonymizer', self)
3
3
 
4
- HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipv4_mask)
4
+ HASH_ALGORITHM = %w(md5 sha1 sha256 sha384 sha512 ipaddr_mask)
5
5
  config_param :hash_salt, :string, :default => ''
6
6
  config_param :ipv4_mask_subnet, :integer, :default => 24
7
+ config_param :ipv6_mask_subnet, :integer, :default => 104
7
8
 
8
9
  include Fluent::HandleTagNameMixin
9
10
 
@@ -39,6 +40,7 @@ class Fluent::AnonymizerOutput < Fluent::Output
39
40
  if @hash_keys.count < 1
40
41
  raise Fluent::ConfigError, "anonymizer: missing hash keys setting."
41
42
  end
43
+ $log.info "anonymizer: adding anonymize rules for each field. #{@hash_keys}"
42
44
 
43
45
  if ( !@remove_tag_prefix && !@remove_tag_suffix && !@add_tag_prefix && !@add_tag_suffix )
44
46
  raise Fluent::ConfigError, "anonymizer: missing remove_tag_prefix, remove_tag_suffix, add_tag_prefix or add_tag_suffix."
@@ -59,10 +61,15 @@ class Fluent::AnonymizerOutput < Fluent::Output
59
61
  end
60
62
 
61
63
  def filter_anonymize_record(data, hash_algorithm)
62
- if data.is_a?(Array)
63
- data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
64
- else
65
- data = anonymize(data, hash_algorithm, @hash_salt)
64
+ begin
65
+ if data.is_a?(Array)
66
+ data = data.collect { |v| anonymize(v, hash_algorithm, @hash_salt) }
67
+ else
68
+ data = anonymize(data, hash_algorithm, @hash_salt)
69
+ end
70
+ rescue StandardError => e
71
+ $log.error "anonymizer: failed to anonymize record. :message=>#{e.message} :data=>#{data}"
72
+ $log.error e.backtrace.join("\n")
66
73
  end
67
74
  data
68
75
  end
@@ -71,8 +78,10 @@ class Fluent::AnonymizerOutput < Fluent::Output
71
78
  case algorithm
72
79
  when 'md5','sha1','sha256','sha384','sha512'
73
80
  DIGEST[algorithm].call.hexdigest(salt + message.to_s)
74
- when 'ipv4_mask'
75
- IPAddr.new(message).mask(@ipv4_mask_subnet).to_s
81
+ when 'ipaddr_mask'
82
+ address = IPAddr.new(message)
83
+ subnet = address.ipv4? ? @ipv4_mask_subnet : @ipv6_mask_subnet
84
+ address.mask(subnet).to_s
76
85
  else
77
86
  $log.warn "anonymizer: unknown algorithm #{algorithm} has called."
78
87
  end
@@ -12,7 +12,7 @@ class AnonymizerOutputTest < Test::Unit::TestCase
12
12
  sha384_keys data_for_sha384
13
13
  sha512_keys data_for_sha512
14
14
  hash_salt test_salt_string
15
- ipv4_mask_keys host
15
+ ipaddr_mask_keys host
16
16
  ipv4_mask_subnet 24
17
17
  remove_tag_prefix input.
18
18
  add_tag_prefix anonymized.
@@ -20,7 +20,7 @@ class AnonymizerOutputTest < Test::Unit::TestCase
20
20
 
21
21
  CONFIG_MULTI_KEYS = %[
22
22
  sha1_keys member_id, mail, telephone
23
- ipv4_mask_keys host
23
+ ipaddr_mask_keys host
24
24
  ipv4_mask_subnet 16
25
25
  remove_tag_prefix input.
26
26
  add_tag_prefix anonymized.
@@ -28,7 +28,15 @@ class AnonymizerOutputTest < Test::Unit::TestCase
28
28
 
29
29
  CONFIG_NEST_VALUE = %[
30
30
  sha1_keys array,hash
31
- ipv4_mask_keys host
31
+ ipaddr_mask_keys host
32
+ remove_tag_prefix input.
33
+ add_tag_prefix anonymized.
34
+ ]
35
+
36
+ CONFIG_IPV6 = %[
37
+ ipaddr_mask_keys host
38
+ ipv4_mask_subnet 24
39
+ ipv6_mask_subnet 104
32
40
  remove_tag_prefix input.
33
41
  add_tag_prefix anonymized.
34
42
  ]
@@ -112,4 +120,20 @@ class AnonymizerOutputTest < Test::Unit::TestCase
112
120
  assert_equal ["e3cbba8883fe746c6e35783c9404b4bc0c7ee9eb", "a4ac914c09d7c097fe1f4f96b897e625b6922069"], emits[0][2]['array']
113
121
  assert_equal '1a1903d78aed9403649d61cb21ba6b489249761b', emits[0][2]['hash']
114
122
  end
123
+
124
+ def test_emit_ipv6
125
+ d1 = create_driver(CONFIG_IPV6, 'input.access')
126
+ d1.run do
127
+ d1.emit({'host' => '10.102.3.80'})
128
+ d1.emit({'host' => '0:0:0:0:0:FFFF:129.144.52.38'})
129
+ d1.emit({'host' => '2001:db8:0:8d3:0:8a2e:70:7344'})
130
+ end
131
+ emits = d1.emits
132
+ assert_equal 3, emits.length
133
+ p emits
134
+ assert_equal 'anonymized.access', emits[0][0] # tag
135
+ assert_equal '10.102.3.0', emits[0][2]['host']
136
+ assert_equal '::ffff:129.0.0.0', emits[1][2]['host']
137
+ assert_equal '2001:db8:0:8d3:0:8a2e::', emits[2][2]['host']
138
+ end
115
139
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-anonymizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-12-03 00:00:00.000000000 Z
12
+ date: 2014-01-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -102,7 +102,7 @@ signing_key:
102
102
  specification_version: 3
103
103
  summary: Fluentd filter output plugin to anonymize records with MD5/SHA1/SHA256/SHA384/SHA512
104
104
  algorithms. This data masking plugin protects privacy data such as ID, email, phone
105
- number, IP address and so on.
105
+ number, IPv4/IPv6 address and so on.
106
106
  test_files:
107
107
  - test/helper.rb
108
108
  - test/plugin/test_out_anonymizer.rb