fluent-plugin-sanitizer 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e6b8ad479091eb0d7ce2f1539ec4966648907f638d5d8e5676d408ea0d4c3ad
4
- data.tar.gz: 21a0001319b6b8ec60b478bd60897a20fcca5a068879a0c73531ffb360cd576f
3
+ metadata.gz: 7a773fb4d2f1fb7c00d11982f92f15fc4803135af5a6a5c3433783fa12d0811c
4
+ data.tar.gz: 8ae5cc7723cab184885ebebd401824700c9ffde8fdf5ab81463f3b67be6aed95
5
5
  SHA512:
6
- metadata.gz: b30b9bec5a8ccc8bf16fcbd3f66bc7ecce7de97778521530eae226b10653b30505f07de47f37246851e6b727152bbd95488614e8d76c96fd0ac3bec3ac2848f0
7
- data.tar.gz: 3f31d9ace5c57a733a956e659e3c80c078e3f447d2b9d705c5d9f1e740be6eb214fd21c6750185f89d920b4cbbec51ac8d8f716fad8021a79286ed9e958bf7d0
6
+ metadata.gz: e5fe885cf146981c70346b7acd1126eed88cba19f090131f56fcdb19f0ef0f68c1a97d039105e59b997ef9662f2db73d8f6dad8407a056ca9fcdcf9da4dfa261
7
+ data.tar.gz: 87810f31b6f307779e785291ca1c4ca6d7c17b9cf62235e5a22bfdd99399e1f551c98cb0750add509a69f6ecf2b1134cb593c3175c46abf7b4f20cf894d821e5
data/README.md CHANGED
@@ -18,10 +18,11 @@ td-agent-gem install fluent-plugin-sanitizer
18
18
  - keys (mandatory) : Name of keys whose values will be masked. You can specify multiple keys. When keys are nested, you can use {parent key}.{child key} like "kubernetes.master_url".
19
19
  - pattern_ipv4 (optional) : Mask IP addresses in IPv4 format. You can use “true” or “false”. (defalt: false)
20
20
  - pattern_fqdn (optional) : Mask hostname in FQDN style. You can use “true” or “false”. (defalt: false)
21
- - pattern_regex (optional) : Mask value mactches custom regular expression. You need to provide a regular expression in these options.
22
- - pattern_regex_prefix (optional) : Define prefix used for masking vales. (default: Regex)
21
+ - pattern_regex (optional) : Mask value mactches custom regular expression.
22
+ - regex_capture_group (optional) : If you define capture group in regular expression, you can specify the name of capture group to be masked.
23
+ - pattern_regex_prefix (optional) : Define prefix used for masking vales. (default: Regex)
23
24
  - pattern_keywords (optional) : Mask values match custom keywords. You can specify multiple keywords.
24
- - pattern_keywords_prefix (optional) : Define prefix used for masking vales. (default: Keyword)
25
+ - pattern_keywords_prefix (optional) : Define prefix used for masking vales. (default: Keyword)
25
26
 
26
27
  You can specify multiple rules in a single configuration. It is also possible to define multiple pattern options in a single rule like the following sample.
27
28
 
@@ -129,6 +130,34 @@ In case log messages including sensitive information such as SSN and phone numbe
129
130
  }
130
131
  }
131
132
  ```
133
+ From v0.1.2, "regex_capture_group" option is available. With "regex_capture_group" option, it is possible to mask specific part of original messages.
134
+
135
+ **Configuration sample**
136
+ ```
137
+ <rule>
138
+ keys user.email
139
+ pattern_regex /(?<user>\w+)\@\w+.\w+/
140
+ regex_capture_group "user"
141
+ pattern_regex_prefix "USER"
142
+ </rule>
143
+ ```
144
+ **Input sample**
145
+ ```
146
+ {
147
+ "user" : {
148
+ "email" : "user1@demo.com"
149
+ }
150
+ }
151
+ ```
152
+ **Output sample**
153
+ ```
154
+ {
155
+ "user" : {
156
+ "email" : "USER_321865df6f0ce6bdf3ea16f74623534a@demo.com"
157
+ }
158
+ }
159
+ ```
160
+
132
161
  ### Tips : Debug how sanitizer works
133
162
  When you design custom rules in a configuration file, you might need information about how Sanitizer masks original values into hash values for debugging purposes. You can check that information if you run td-agent/Fluentd with debug option enabled. The debug information is shown in the log file of td-agent/Fluentd like the following log message sample.
134
163
 
@@ -3,7 +3,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
3
 
4
4
  Gem::Specification.new do |spec|
5
5
  spec.name = "fluent-plugin-sanitizer"
6
- spec.version = "0.1.2"
6
+ spec.version = "0.1.3"
7
7
  spec.authors = ["TK Kubota"]
8
8
  spec.email = ["tkubota@ctc-america.com"]
9
9
 
@@ -23,44 +23,58 @@ module Fluent
23
23
 
24
24
  helpers :event_emitter, :record_accessor
25
25
 
26
- desc "Hash salt to be used to generate hash values with MD5(optional)"
26
+ desc "Hash salt to be used to generate hash values with specified hash(optional)"
27
27
  config_param :hash_salt, :string, default: ""
28
-
29
- config_section :rule, param_name: :rules, multi: true do
30
- desc "Name of keys whose valuse are to be sanitized"
28
+
29
+ desc "Hash scheme to use for generating hash value (supported schemes are md5,sha1,sha256,sha384,sha512) (optional)"
30
+ config_param :hash_scheme, :enum, list: [:md5, :sha1, :sha256, :sha384, :sha512], default: :md5
31
+
32
+ config_section :rule, param_name: :rules, multi: true do
33
+ desc "Name of keys whose values are to be sanitized"
31
34
  config_param :keys, :array, default: []
32
35
  desc "Sanitize if values contain IPv4 (optional)"
33
36
  config_param :pattern_ipv4, :bool, default: false
34
- desc "Sanitize if values contain Hostname in FQDN style (ptional)"
37
+ desc "Sanitize if values contain Hostname in FQDN style (optional)"
35
38
  config_param :pattern_fqdn, :bool, default: false
36
- desc "Sanitize if values mactch custom regular expression (optional)"
39
+ desc "Sanitize if values match custom regular expression (optional)"
37
40
  config_param :pattern_regex, :regexp, default: /^$/
38
- desc "Prefix for pattern_regex (optional)"
39
- config_param :regex_capture_group, :string, default:""
40
41
  desc "Target capture group name to be masked (optional)"
42
+ config_param :regex_capture_group, :string, default:""
43
+ desc "Prefix for pattern_regex (optional)"
41
44
  config_param :pattern_regex_prefix, :string, default: "Regex"
42
- desc "Sanitize if values mactch custom keywords (optional)"
45
+ desc "Sanitize if values match custom keywords (optional)"
43
46
  config_param :pattern_keywords, :array, default: []
44
47
  desc "Prefix for pattern_keywords (optional)"
45
48
  config_param :pattern_keywords_prefix, :string, default: "Keywords"
46
49
  end
47
50
 
48
51
  def configure(conf)
49
- super
50
-
52
+ super
51
53
  @salt = conf['hash_salt']
52
-
54
+ @salt = "" if @salt.nil?
55
+ @hash_scheme = conf['hash_scheme']
56
+ @sanitize_func =
57
+ case @hash_scheme
58
+ when "sha1"
59
+ Proc.new { |str| Digest::SHA1.hexdigest(@salt + str) }
60
+ when "sha256"
61
+ Proc.new { |str| Digest::SHA256.hexdigest(@salt +str) }
62
+ when "sha384"
63
+ Proc.new { |str| Digest::SHA384.hexdigest(@salt +str) }
64
+ when "sha512"
65
+ Proc.new { |str| Digest::SHA512.hexdigest(@salt +str) }
66
+ else
67
+ Proc.new { |str| Digest::MD5.hexdigest(@salt +str) }
68
+ end
69
+
53
70
  @sanitizerules = []
54
71
  @rules.each do |rule|
55
72
  if rule.keys.empty?
56
73
  raise Fluent::ConfigError, "You need to specify at least one key in rule statement."
57
74
  else
58
- #keys = record_accessor_create(rule.keys)
59
75
  keys = rule.keys
60
76
  end
61
77
 
62
- #record_accessor_create(rule.keys)
63
-
64
78
  if rule.pattern_ipv4 || !rule.pattern_ipv4
65
79
  pattern_ipv4 = rule.pattern_ipv4
66
80
  else
@@ -77,16 +91,11 @@ module Fluent
77
91
  pattern_regex = rule.pattern_regex
78
92
  regex_capture_group = rule.regex_capture_group
79
93
  else
80
- raise Fluent::ConfigError, "Your need to specify Regexp for pattern_fqdn option."
81
- end
82
-
94
+ raise Fluent::ConfigError, "Your need to specify Regexp for pattern_regex option."
95
+ end
96
+
83
97
  pattern_keywords = rule.pattern_keywords
84
98
 
85
- case [pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords.empty?]
86
- when [false, false, /^$/, true]
87
- raise Fluent::ConfigError, "You need to specify at least one pattern option in the rule statement."
88
- end
89
-
90
99
  regex_prefix = rule.pattern_regex_prefix
91
100
  keywords_prefix = rule.pattern_keywords_prefix
92
101
 
@@ -98,22 +107,27 @@ module Fluent
98
107
  @sanitizerules.each do |keys, pattern_ipv4, pattern_fqdn, pattern_regex, regex_capture_group, pattern_keywords, regex_prefix, keywords_prefix|
99
108
  keys.each do |key|
100
109
  accessor = record_accessor_create("$."+key.to_s)
101
- if pattern_ipv4 && accessor.call(record)
102
- accessor.set(record, sanitize_ipv4_val(accessor.call(record).to_s))
103
- end
104
- if pattern_fqdn && accessor.call(record)
105
- accessor.set(record, sanitize_fqdn_val(accessor.call(record).to_s))
106
- end
107
- if pattern_regex && accessor.call(record)
108
- if regex_capture_group.empty?
109
- accessor.set(record, sanitize_regex_val(accessor.call(record).to_s, regex_prefix, pattern_regex))
110
- else
111
- accessor.set(record, sanitize_regex_val_capture(accessor.call(record).to_s, regex_prefix, pattern_regex, regex_capture_group))
110
+ begin
111
+ if pattern_ipv4 && accessor.call(record)
112
+ accessor.set(record, sanitize_ipv4_val(accessor.call(record).to_s))
113
+ end
114
+ if pattern_fqdn && accessor.call(record)
115
+ accessor.set(record, sanitize_fqdn_val(accessor.call(record).to_s))
116
+ end
117
+ if !pattern_regex.to_s.eql?("(?-mix:^$)") && accessor.call(record)
118
+ if regex_capture_group.empty?
119
+ accessor.set(record, sanitize_regex_val(accessor.call(record), regex_prefix, pattern_regex))
120
+ else
121
+ accessor.set(record, sanitize_regex_val_capture(accessor.call(record), regex_prefix, pattern_regex, regex_capture_group))
122
+ end
123
+ #end
124
+ end
125
+ if !pattern_keywords.empty? && accessor.call(record)
126
+ accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
127
+ end
128
+ rescue => e
129
+ log.warn "Skipping this key", error_class: e.class, error: e.message
112
130
  end
113
- end
114
- if !pattern_keywords.empty? && accessor.call(record)
115
- accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
116
- end
117
131
  end
118
132
  end
119
133
  record
@@ -152,16 +166,32 @@ module Fluent
152
166
  end
153
167
 
154
168
  def sanitize_ipv4(str)
155
- return "IPv4_"+Digest::MD5.hexdigest(@salt + str)
169
+ return "IPv4_"+ @sanitize_func.call(str)
156
170
  end
157
171
 
158
172
  def sanitize_fqdn(str)
159
- return "FQDN_"+Digest::MD5.hexdigest(@salt + str)
173
+ return "FQDN_"+ @sanitize_func.call(str)
174
+ end
175
+
176
+ def sanitize_val(str, prefix)
177
+ s = prefix + "_" + @sanitize_func.call(str)
178
+ $log.debug "[pattern_regex] sanitize '#{str}' to '#{s}'" if str != s
179
+ return s
160
180
  end
161
181
 
162
182
  def sanitize_regex(str, prefix, regex)
163
- if str.to_s.match?(regex)
164
- return prefix + "_" + Digest::MD5.hexdigest(@salt + str)
183
+ regex_p = Regexp.new(regex)
184
+ if str =~ regex_p
185
+ scans = str.scan(regex).flatten
186
+ if scans.any?{ |e| e.nil? }
187
+ return prefix + "_" + @sanitize_func.call(str)
188
+ else
189
+ scans.each do |s|
190
+ mask = prefix + "_" + @sanitize_func.call(str)
191
+ str = str.gsub(s, mask)
192
+ end
193
+ end
194
+ return str
165
195
  else
166
196
  $log.debug "[pattern_regex] #{str} does not match given regex #{regex}. skip this rule."
167
197
  return str
@@ -169,11 +199,15 @@ module Fluent
169
199
  end
170
200
 
171
201
  def sanitize_regex_capture(str, prefix, regex, capture_group)
172
- if str.match?(regex)
202
+ regex_p = Regexp.new(regex)
203
+ if str =~ regex_p
173
204
  if str.match(regex).names.include?(capture_group)
174
- cg = str.match(regex)[capture_group]
175
- mask = prefix + "_" + Digest::MD5.hexdigest(@salt + cg)
176
- return str.split(cg)[0] + mask + str.split(cg)[1]
205
+ scans = str.scan(regex).flatten
206
+ scans.each do |s|
207
+ mask = prefix + "_" + @sanitize_func.call(str)
208
+ str = str.gsub(s, mask)
209
+ end
210
+ return str
177
211
  else
178
212
  $log.debug "[pattern_regex] regex pattern matched but capture group '#{capture_group}' does not exist. Skip this rule."
179
213
  return str
@@ -185,7 +219,7 @@ module Fluent
185
219
  end
186
220
 
187
221
  def sanitize_keyword(str, prefix)
188
- return prefix + "_" + Digest::MD5.hexdigest(@salt + str)
222
+ return prefix + "_" + @sanitize_func.call(str)
189
223
  end
190
224
 
191
225
  def sanitize_ipv4_port(str)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-sanitizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - TK Kubota
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-12 00:00:00.000000000 Z
11
+ date: 2022-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler