fluent-plugin-sanitizer 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +32 -3
- data/fluent-plugin-sanitizer.gemspec +1 -1
- data/lib/fluent/plugin/filter_sanitizer.rb +81 -47
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a773fb4d2f1fb7c00d11982f92f15fc4803135af5a6a5c3433783fa12d0811c
|
4
|
+
data.tar.gz: 8ae5cc7723cab184885ebebd401824700c9ffde8fdf5ab81463f3b67be6aed95
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e5fe885cf146981c70346b7acd1126eed88cba19f090131f56fcdb19f0ef0f68c1a97d039105e59b997ef9662f2db73d8f6dad8407a056ca9fcdcf9da4dfa261
|
7
|
+
data.tar.gz: 87810f31b6f307779e785291ca1c4ca6d7c17b9cf62235e5a22bfdd99399e1f551c98cb0750add509a69f6ecf2b1134cb593c3175c46abf7b4f20cf894d821e5
|
data/README.md
CHANGED
@@ -18,10 +18,11 @@ td-agent-gem install fluent-plugin-sanitizer
|
|
18
18
|
- keys (mandatory) : Name of keys whose values will be masked. You can specify multiple keys. When keys are nested, you can use {parent key}.{child key} like "kubernetes.master_url".
|
19
19
|
- pattern_ipv4 (optional) : Mask IP addresses in IPv4 format. You can use “true” or “false”. (defalt: false)
|
20
20
|
- pattern_fqdn (optional) : Mask hostname in FQDN style. You can use “true” or “false”. (defalt: false)
|
21
|
-
- pattern_regex (optional) : Mask value mactches custom regular expression.
|
22
|
-
|
21
|
+
- pattern_regex (optional) : Mask value mactches custom regular expression.
|
22
|
+
- regex_capture_group (optional) : If you define capture group in regular expression, you can specify the name of capture group to be masked.
|
23
|
+
- pattern_regex_prefix (optional) : Define prefix used for masking vales. (default: Regex)
|
23
24
|
- pattern_keywords (optional) : Mask values match custom keywords. You can specify multiple keywords.
|
24
|
-
|
25
|
+
- pattern_keywords_prefix (optional) : Define prefix used for masking vales. (default: Keyword)
|
25
26
|
|
26
27
|
You can specify multiple rules in a single configuration. It is also possible to define multiple pattern options in a single rule like the following sample.
|
27
28
|
|
@@ -129,6 +130,34 @@ In case log messages including sensitive information such as SSN and phone numbe
|
|
129
130
|
}
|
130
131
|
}
|
131
132
|
```
|
133
|
+
From v0.1.2, "regex_capture_group" option is available. With "regex_capture_group" option, it is possible to mask specific part of original messages.
|
134
|
+
|
135
|
+
**Configuration sample**
|
136
|
+
```
|
137
|
+
<rule>
|
138
|
+
keys user.email
|
139
|
+
pattern_regex /(?<user>\w+)\@\w+.\w+/
|
140
|
+
regex_capture_group "user"
|
141
|
+
pattern_regex_prefix "USER"
|
142
|
+
</rule>
|
143
|
+
```
|
144
|
+
**Input sample**
|
145
|
+
```
|
146
|
+
{
|
147
|
+
"user" : {
|
148
|
+
"email" : "user1@demo.com"
|
149
|
+
}
|
150
|
+
}
|
151
|
+
```
|
152
|
+
**Output sample**
|
153
|
+
```
|
154
|
+
{
|
155
|
+
"user" : {
|
156
|
+
"email" : "USER_321865df6f0ce6bdf3ea16f74623534a@demo.com"
|
157
|
+
}
|
158
|
+
}
|
159
|
+
```
|
160
|
+
|
132
161
|
### Tips : Debug how sanitizer works
|
133
162
|
When you design custom rules in a configuration file, you might need information about how Sanitizer masks original values into hash values for debugging purposes. You can check that information if you run td-agent/Fluentd with debug option enabled. The debug information is shown in the log file of td-agent/Fluentd like the following log message sample.
|
134
163
|
|
@@ -23,44 +23,58 @@ module Fluent
|
|
23
23
|
|
24
24
|
helpers :event_emitter, :record_accessor
|
25
25
|
|
26
|
-
desc "Hash salt to be used to generate hash values with
|
26
|
+
desc "Hash salt to be used to generate hash values with specified hash(optional)"
|
27
27
|
config_param :hash_salt, :string, default: ""
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
|
29
|
+
desc "Hash scheme to use for generating hash value (supported schemes are md5,sha1,sha256,sha384,sha512) (optional)"
|
30
|
+
config_param :hash_scheme, :enum, list: [:md5, :sha1, :sha256, :sha384, :sha512], default: :md5
|
31
|
+
|
32
|
+
config_section :rule, param_name: :rules, multi: true do
|
33
|
+
desc "Name of keys whose values are to be sanitized"
|
31
34
|
config_param :keys, :array, default: []
|
32
35
|
desc "Sanitize if values contain IPv4 (optional)"
|
33
36
|
config_param :pattern_ipv4, :bool, default: false
|
34
|
-
desc "Sanitize if values contain Hostname in FQDN style (
|
37
|
+
desc "Sanitize if values contain Hostname in FQDN style (optional)"
|
35
38
|
config_param :pattern_fqdn, :bool, default: false
|
36
|
-
desc "Sanitize if values
|
39
|
+
desc "Sanitize if values match custom regular expression (optional)"
|
37
40
|
config_param :pattern_regex, :regexp, default: /^$/
|
38
|
-
desc "Prefix for pattern_regex (optional)"
|
39
|
-
config_param :regex_capture_group, :string, default:""
|
40
41
|
desc "Target capture group name to be masked (optional)"
|
42
|
+
config_param :regex_capture_group, :string, default:""
|
43
|
+
desc "Prefix for pattern_regex (optional)"
|
41
44
|
config_param :pattern_regex_prefix, :string, default: "Regex"
|
42
|
-
desc "Sanitize if values
|
45
|
+
desc "Sanitize if values match custom keywords (optional)"
|
43
46
|
config_param :pattern_keywords, :array, default: []
|
44
47
|
desc "Prefix for pattern_keywords (optional)"
|
45
48
|
config_param :pattern_keywords_prefix, :string, default: "Keywords"
|
46
49
|
end
|
47
50
|
|
48
51
|
def configure(conf)
|
49
|
-
super
|
50
|
-
|
52
|
+
super
|
51
53
|
@salt = conf['hash_salt']
|
52
|
-
|
54
|
+
@salt = "" if @salt.nil?
|
55
|
+
@hash_scheme = conf['hash_scheme']
|
56
|
+
@sanitize_func =
|
57
|
+
case @hash_scheme
|
58
|
+
when "sha1"
|
59
|
+
Proc.new { |str| Digest::SHA1.hexdigest(@salt + str) }
|
60
|
+
when "sha256"
|
61
|
+
Proc.new { |str| Digest::SHA256.hexdigest(@salt +str) }
|
62
|
+
when "sha384"
|
63
|
+
Proc.new { |str| Digest::SHA384.hexdigest(@salt +str) }
|
64
|
+
when "sha512"
|
65
|
+
Proc.new { |str| Digest::SHA512.hexdigest(@salt +str) }
|
66
|
+
else
|
67
|
+
Proc.new { |str| Digest::MD5.hexdigest(@salt +str) }
|
68
|
+
end
|
69
|
+
|
53
70
|
@sanitizerules = []
|
54
71
|
@rules.each do |rule|
|
55
72
|
if rule.keys.empty?
|
56
73
|
raise Fluent::ConfigError, "You need to specify at least one key in rule statement."
|
57
74
|
else
|
58
|
-
#keys = record_accessor_create(rule.keys)
|
59
75
|
keys = rule.keys
|
60
76
|
end
|
61
77
|
|
62
|
-
#record_accessor_create(rule.keys)
|
63
|
-
|
64
78
|
if rule.pattern_ipv4 || !rule.pattern_ipv4
|
65
79
|
pattern_ipv4 = rule.pattern_ipv4
|
66
80
|
else
|
@@ -77,16 +91,11 @@ module Fluent
|
|
77
91
|
pattern_regex = rule.pattern_regex
|
78
92
|
regex_capture_group = rule.regex_capture_group
|
79
93
|
else
|
80
|
-
raise Fluent::ConfigError, "Your need to specify Regexp for
|
81
|
-
end
|
82
|
-
|
94
|
+
raise Fluent::ConfigError, "Your need to specify Regexp for pattern_regex option."
|
95
|
+
end
|
96
|
+
|
83
97
|
pattern_keywords = rule.pattern_keywords
|
84
98
|
|
85
|
-
case [pattern_ipv4, pattern_fqdn, pattern_regex, pattern_keywords.empty?]
|
86
|
-
when [false, false, /^$/, true]
|
87
|
-
raise Fluent::ConfigError, "You need to specify at least one pattern option in the rule statement."
|
88
|
-
end
|
89
|
-
|
90
99
|
regex_prefix = rule.pattern_regex_prefix
|
91
100
|
keywords_prefix = rule.pattern_keywords_prefix
|
92
101
|
|
@@ -98,22 +107,27 @@ module Fluent
|
|
98
107
|
@sanitizerules.each do |keys, pattern_ipv4, pattern_fqdn, pattern_regex, regex_capture_group, pattern_keywords, regex_prefix, keywords_prefix|
|
99
108
|
keys.each do |key|
|
100
109
|
accessor = record_accessor_create("$."+key.to_s)
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
110
|
+
begin
|
111
|
+
if pattern_ipv4 && accessor.call(record)
|
112
|
+
accessor.set(record, sanitize_ipv4_val(accessor.call(record).to_s))
|
113
|
+
end
|
114
|
+
if pattern_fqdn && accessor.call(record)
|
115
|
+
accessor.set(record, sanitize_fqdn_val(accessor.call(record).to_s))
|
116
|
+
end
|
117
|
+
if !pattern_regex.to_s.eql?("(?-mix:^$)") && accessor.call(record)
|
118
|
+
if regex_capture_group.empty?
|
119
|
+
accessor.set(record, sanitize_regex_val(accessor.call(record), regex_prefix, pattern_regex))
|
120
|
+
else
|
121
|
+
accessor.set(record, sanitize_regex_val_capture(accessor.call(record), regex_prefix, pattern_regex, regex_capture_group))
|
122
|
+
end
|
123
|
+
#end
|
124
|
+
end
|
125
|
+
if !pattern_keywords.empty? && accessor.call(record)
|
126
|
+
accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
|
127
|
+
end
|
128
|
+
rescue => e
|
129
|
+
log.warn "Skipping this key", error_class: e.class, error: e.message
|
112
130
|
end
|
113
|
-
end
|
114
|
-
if !pattern_keywords.empty? && accessor.call(record)
|
115
|
-
accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
|
116
|
-
end
|
117
131
|
end
|
118
132
|
end
|
119
133
|
record
|
@@ -152,16 +166,32 @@ module Fluent
|
|
152
166
|
end
|
153
167
|
|
154
168
|
def sanitize_ipv4(str)
|
155
|
-
return "IPv4_"+
|
169
|
+
return "IPv4_"+ @sanitize_func.call(str)
|
156
170
|
end
|
157
171
|
|
158
172
|
def sanitize_fqdn(str)
|
159
|
-
return "FQDN_"+
|
173
|
+
return "FQDN_"+ @sanitize_func.call(str)
|
174
|
+
end
|
175
|
+
|
176
|
+
def sanitize_val(str, prefix)
|
177
|
+
s = prefix + "_" + @sanitize_func.call(str)
|
178
|
+
$log.debug "[pattern_regex] sanitize '#{str}' to '#{s}'" if str != s
|
179
|
+
return s
|
160
180
|
end
|
161
181
|
|
162
182
|
def sanitize_regex(str, prefix, regex)
|
163
|
-
|
164
|
-
|
183
|
+
regex_p = Regexp.new(regex)
|
184
|
+
if str =~ regex_p
|
185
|
+
scans = str.scan(regex).flatten
|
186
|
+
if scans.any?{ |e| e.nil? }
|
187
|
+
return prefix + "_" + @sanitize_func.call(str)
|
188
|
+
else
|
189
|
+
scans.each do |s|
|
190
|
+
mask = prefix + "_" + @sanitize_func.call(str)
|
191
|
+
str = str.gsub(s, mask)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
return str
|
165
195
|
else
|
166
196
|
$log.debug "[pattern_regex] #{str} does not match given regex #{regex}. skip this rule."
|
167
197
|
return str
|
@@ -169,11 +199,15 @@ module Fluent
|
|
169
199
|
end
|
170
200
|
|
171
201
|
def sanitize_regex_capture(str, prefix, regex, capture_group)
|
172
|
-
|
202
|
+
regex_p = Regexp.new(regex)
|
203
|
+
if str =~ regex_p
|
173
204
|
if str.match(regex).names.include?(capture_group)
|
174
|
-
|
175
|
-
|
176
|
-
|
205
|
+
scans = str.scan(regex).flatten
|
206
|
+
scans.each do |s|
|
207
|
+
mask = prefix + "_" + @sanitize_func.call(str)
|
208
|
+
str = str.gsub(s, mask)
|
209
|
+
end
|
210
|
+
return str
|
177
211
|
else
|
178
212
|
$log.debug "[pattern_regex] regex pattern matched but capture group '#{capture_group}' does not exist. Skip this rule."
|
179
213
|
return str
|
@@ -185,7 +219,7 @@ module Fluent
|
|
185
219
|
end
|
186
220
|
|
187
221
|
def sanitize_keyword(str, prefix)
|
188
|
-
return prefix + "_" +
|
222
|
+
return prefix + "_" + @sanitize_func.call(str)
|
189
223
|
end
|
190
224
|
|
191
225
|
def sanitize_ipv4_port(str)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TK Kubota
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-11-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|